Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/INSTALLER +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/LICENSE.txt +13 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/METADATA +250 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/RECORD +131 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/WHEEL +6 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/top_level.txt +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/__init__.py +77 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/from_thread.py +527 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/lowlevel.py +161 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/py.typed +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/pytest_plugin.py +191 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_interpreter.py +218 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_process.py +258 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_thread.py +69 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/_compat.py +11 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/parser_inline.py +147 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/port.yaml +48 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/token.py +180 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/INSTALLER +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/LICENSE +202 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/METADATA +317 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/NOTICE +13 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/RECORD +18 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/WHEEL +6 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/top_level.txt +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/__init__.py +437 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_azurefs.pyx +134 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute.pyx +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute_docstrings.py +56 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_cuda.pyx +1080 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_dataset_parquet.pyx +1053 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_fs.pxd +91 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_hdfs.pyx +160 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pxd +36 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pyx +310 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_orc.pyx +445 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_parquet.pyx +2266 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_pyarrow_cpp_tests.pxd +33 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_substrait.pyx +481 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cffi.py +81 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/compute.py +744 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/config.pxi +95 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cuda.py +25 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/error.pxi +274 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/fs.py +431 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.pxi +1403 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.py +285 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/jvm.py +335 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib.pxd +770 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib_api.h +201 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/LICENSE.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright aio-libs contributors.
|
| 2 |
+
|
| 3 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
you may not use this file except in compliance with the License.
|
| 5 |
+
You may obtain a copy of the License at
|
| 6 |
+
|
| 7 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
|
| 9 |
+
Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
See the License for the specific language governing permissions and
|
| 13 |
+
limitations under the License.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/METADATA
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.2
|
| 2 |
+
Name: aiohttp
|
| 3 |
+
Version: 3.11.13
|
| 4 |
+
Summary: Async http client/server framework (asyncio)
|
| 5 |
+
Home-page: https://github.com/aio-libs/aiohttp
|
| 6 |
+
Maintainer: aiohttp team <team@aiohttp.org>
|
| 7 |
+
Maintainer-email: team@aiohttp.org
|
| 8 |
+
License: Apache-2.0
|
| 9 |
+
Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org
|
| 10 |
+
Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org
|
| 11 |
+
Project-URL: CI: GitHub Actions, https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI
|
| 12 |
+
Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/aiohttp
|
| 13 |
+
Project-URL: Docs: Changelog, https://docs.aiohttp.org/en/stable/changes.html
|
| 14 |
+
Project-URL: Docs: RTD, https://docs.aiohttp.org
|
| 15 |
+
Project-URL: GitHub: issues, https://github.com/aio-libs/aiohttp/issues
|
| 16 |
+
Project-URL: GitHub: repo, https://github.com/aio-libs/aiohttp
|
| 17 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 18 |
+
Classifier: Framework :: AsyncIO
|
| 19 |
+
Classifier: Intended Audience :: Developers
|
| 20 |
+
Classifier: License :: OSI Approved :: Apache Software License
|
| 21 |
+
Classifier: Operating System :: POSIX
|
| 22 |
+
Classifier: Operating System :: MacOS :: MacOS X
|
| 23 |
+
Classifier: Operating System :: Microsoft :: Windows
|
| 24 |
+
Classifier: Programming Language :: Python
|
| 25 |
+
Classifier: Programming Language :: Python :: 3
|
| 26 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 27 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 28 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 29 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 30 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 31 |
+
Classifier: Topic :: Internet :: WWW/HTTP
|
| 32 |
+
Requires-Python: >=3.9
|
| 33 |
+
Description-Content-Type: text/x-rst
|
| 34 |
+
License-File: LICENSE.txt
|
| 35 |
+
Requires-Dist: aiohappyeyeballs>=2.3.0
|
| 36 |
+
Requires-Dist: aiosignal>=1.1.2
|
| 37 |
+
Requires-Dist: async-timeout<6.0,>=4.0; python_version < "3.11"
|
| 38 |
+
Requires-Dist: attrs>=17.3.0
|
| 39 |
+
Requires-Dist: frozenlist>=1.1.1
|
| 40 |
+
Requires-Dist: multidict<7.0,>=4.5
|
| 41 |
+
Requires-Dist: propcache>=0.2.0
|
| 42 |
+
Requires-Dist: yarl<2.0,>=1.17.0
|
| 43 |
+
Provides-Extra: speedups
|
| 44 |
+
Requires-Dist: aiodns>=3.2.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "speedups"
|
| 45 |
+
Requires-Dist: Brotli; platform_python_implementation == "CPython" and extra == "speedups"
|
| 46 |
+
Requires-Dist: brotlicffi; platform_python_implementation != "CPython" and extra == "speedups"
|
| 47 |
+
|
| 48 |
+
==================================
|
| 49 |
+
Async http client/server framework
|
| 50 |
+
==================================
|
| 51 |
+
|
| 52 |
+
.. image:: https://raw.githubusercontent.com/aio-libs/aiohttp/master/docs/aiohttp-plain.svg
|
| 53 |
+
:height: 64px
|
| 54 |
+
:width: 64px
|
| 55 |
+
:alt: aiohttp logo
|
| 56 |
+
|
| 57 |
+
|
|
| 58 |
+
|
| 59 |
+
.. image:: https://github.com/aio-libs/aiohttp/workflows/CI/badge.svg
|
| 60 |
+
:target: https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI
|
| 61 |
+
:alt: GitHub Actions status for master branch
|
| 62 |
+
|
| 63 |
+
.. image:: https://codecov.io/gh/aio-libs/aiohttp/branch/master/graph/badge.svg
|
| 64 |
+
:target: https://codecov.io/gh/aio-libs/aiohttp
|
| 65 |
+
:alt: codecov.io status for master branch
|
| 66 |
+
|
| 67 |
+
.. image:: https://img.shields.io/endpoint?url=https://codspeed.io/badge.json
|
| 68 |
+
:target: https://codspeed.io/aio-libs/aiohttp
|
| 69 |
+
:alt: Codspeed.io status for aiohttp
|
| 70 |
+
|
| 71 |
+
.. image:: https://badge.fury.io/py/aiohttp.svg
|
| 72 |
+
:target: https://pypi.org/project/aiohttp
|
| 73 |
+
:alt: Latest PyPI package version
|
| 74 |
+
|
| 75 |
+
.. image:: https://readthedocs.org/projects/aiohttp/badge/?version=latest
|
| 76 |
+
:target: https://docs.aiohttp.org/
|
| 77 |
+
:alt: Latest Read The Docs
|
| 78 |
+
|
| 79 |
+
.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
|
| 80 |
+
:target: https://matrix.to/#/%23aio-libs:matrix.org
|
| 81 |
+
:alt: Matrix Room — #aio-libs:matrix.org
|
| 82 |
+
|
| 83 |
+
.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
|
| 84 |
+
:target: https://matrix.to/#/%23aio-libs-space:matrix.org
|
| 85 |
+
:alt: Matrix Space — #aio-libs-space:matrix.org
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
Key Features
|
| 89 |
+
============
|
| 90 |
+
|
| 91 |
+
- Supports both client and server side of HTTP protocol.
|
| 92 |
+
- Supports both client and server Web-Sockets out-of-the-box and avoids
|
| 93 |
+
Callback Hell.
|
| 94 |
+
- Provides Web-server with middleware and pluggable routing.
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
Getting started
|
| 98 |
+
===============
|
| 99 |
+
|
| 100 |
+
Client
|
| 101 |
+
------
|
| 102 |
+
|
| 103 |
+
To get something from the web:
|
| 104 |
+
|
| 105 |
+
.. code-block:: python
|
| 106 |
+
|
| 107 |
+
import aiohttp
|
| 108 |
+
import asyncio
|
| 109 |
+
|
| 110 |
+
async def main():
|
| 111 |
+
|
| 112 |
+
async with aiohttp.ClientSession() as session:
|
| 113 |
+
async with session.get('http://python.org') as response:
|
| 114 |
+
|
| 115 |
+
print("Status:", response.status)
|
| 116 |
+
print("Content-type:", response.headers['content-type'])
|
| 117 |
+
|
| 118 |
+
html = await response.text()
|
| 119 |
+
print("Body:", html[:15], "...")
|
| 120 |
+
|
| 121 |
+
asyncio.run(main())
|
| 122 |
+
|
| 123 |
+
This prints:
|
| 124 |
+
|
| 125 |
+
.. code-block::
|
| 126 |
+
|
| 127 |
+
Status: 200
|
| 128 |
+
Content-type: text/html; charset=utf-8
|
| 129 |
+
Body: <!doctype html> ...
|
| 130 |
+
|
| 131 |
+
Coming from `requests <https://requests.readthedocs.io/>`_ ? Read `why we need so many lines <https://aiohttp.readthedocs.io/en/latest/http_request_lifecycle.html>`_.
|
| 132 |
+
|
| 133 |
+
Server
|
| 134 |
+
------
|
| 135 |
+
|
| 136 |
+
An example using a simple server:
|
| 137 |
+
|
| 138 |
+
.. code-block:: python
|
| 139 |
+
|
| 140 |
+
# examples/server_simple.py
|
| 141 |
+
from aiohttp import web
|
| 142 |
+
|
| 143 |
+
async def handle(request):
|
| 144 |
+
name = request.match_info.get('name', "Anonymous")
|
| 145 |
+
text = "Hello, " + name
|
| 146 |
+
return web.Response(text=text)
|
| 147 |
+
|
| 148 |
+
async def wshandle(request):
|
| 149 |
+
ws = web.WebSocketResponse()
|
| 150 |
+
await ws.prepare(request)
|
| 151 |
+
|
| 152 |
+
async for msg in ws:
|
| 153 |
+
if msg.type == web.WSMsgType.text:
|
| 154 |
+
await ws.send_str("Hello, {}".format(msg.data))
|
| 155 |
+
elif msg.type == web.WSMsgType.binary:
|
| 156 |
+
await ws.send_bytes(msg.data)
|
| 157 |
+
elif msg.type == web.WSMsgType.close:
|
| 158 |
+
break
|
| 159 |
+
|
| 160 |
+
return ws
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
app = web.Application()
|
| 164 |
+
app.add_routes([web.get('/', handle),
|
| 165 |
+
web.get('/echo', wshandle),
|
| 166 |
+
web.get('/{name}', handle)])
|
| 167 |
+
|
| 168 |
+
if __name__ == '__main__':
|
| 169 |
+
web.run_app(app)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
Documentation
|
| 173 |
+
=============
|
| 174 |
+
|
| 175 |
+
https://aiohttp.readthedocs.io/
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
Demos
|
| 179 |
+
=====
|
| 180 |
+
|
| 181 |
+
https://github.com/aio-libs/aiohttp-demos
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
External links
|
| 185 |
+
==============
|
| 186 |
+
|
| 187 |
+
* `Third party libraries
|
| 188 |
+
<http://aiohttp.readthedocs.io/en/latest/third_party.html>`_
|
| 189 |
+
* `Built with aiohttp
|
| 190 |
+
<http://aiohttp.readthedocs.io/en/latest/built_with.html>`_
|
| 191 |
+
* `Powered by aiohttp
|
| 192 |
+
<http://aiohttp.readthedocs.io/en/latest/powered_by.html>`_
|
| 193 |
+
|
| 194 |
+
Feel free to make a Pull Request for adding your link to these pages!
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
Communication channels
|
| 198 |
+
======================
|
| 199 |
+
|
| 200 |
+
*aio-libs Discussions*: https://github.com/aio-libs/aiohttp/discussions
|
| 201 |
+
|
| 202 |
+
*Matrix*: `#aio-libs:matrix.org <https://matrix.to/#/#aio-libs:matrix.org>`_
|
| 203 |
+
|
| 204 |
+
We support `Stack Overflow
|
| 205 |
+
<https://stackoverflow.com/questions/tagged/aiohttp>`_.
|
| 206 |
+
Please add *aiohttp* tag to your question there.
|
| 207 |
+
|
| 208 |
+
Requirements
|
| 209 |
+
============
|
| 210 |
+
|
| 211 |
+
- attrs_
|
| 212 |
+
- multidict_
|
| 213 |
+
- yarl_
|
| 214 |
+
- frozenlist_
|
| 215 |
+
|
| 216 |
+
Optionally you may install the aiodns_ library (highly recommended for sake of speed).
|
| 217 |
+
|
| 218 |
+
.. _aiodns: https://pypi.python.org/pypi/aiodns
|
| 219 |
+
.. _attrs: https://github.com/python-attrs/attrs
|
| 220 |
+
.. _multidict: https://pypi.python.org/pypi/multidict
|
| 221 |
+
.. _frozenlist: https://pypi.org/project/frozenlist/
|
| 222 |
+
.. _yarl: https://pypi.python.org/pypi/yarl
|
| 223 |
+
.. _async-timeout: https://pypi.python.org/pypi/async_timeout
|
| 224 |
+
|
| 225 |
+
License
|
| 226 |
+
=======
|
| 227 |
+
|
| 228 |
+
``aiohttp`` is offered under the Apache 2 license.
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
Keepsafe
|
| 232 |
+
========
|
| 233 |
+
|
| 234 |
+
The aiohttp community would like to thank Keepsafe
|
| 235 |
+
(https://www.getkeepsafe.com) for its support in the early days of
|
| 236 |
+
the project.
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
Source code
|
| 240 |
+
===========
|
| 241 |
+
|
| 242 |
+
The latest developer version is available in a GitHub repository:
|
| 243 |
+
https://github.com/aio-libs/aiohttp
|
| 244 |
+
|
| 245 |
+
Benchmarks
|
| 246 |
+
==========
|
| 247 |
+
|
| 248 |
+
If you are interested in efficiency, the AsyncIO community maintains a
|
| 249 |
+
list of benchmarks on the official wiki:
|
| 250 |
+
https://github.com/python/asyncio/wiki/Benchmarks
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/RECORD
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohttp-3.11.13.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
aiohttp-3.11.13.dist-info/LICENSE.txt,sha256=n4DQ2311WpQdtFchcsJw7L2PCCuiFd3QlZhZQu2Uqes,588
|
| 3 |
+
aiohttp-3.11.13.dist-info/METADATA,sha256=V_vS5w25_e4iRV4NQ4mjjJeaSpzg0IaWqru7XzJqJLc,7712
|
| 4 |
+
aiohttp-3.11.13.dist-info/RECORD,,
|
| 5 |
+
aiohttp-3.11.13.dist-info/WHEEL,sha256=siqMuoWpRueIZ87ijidBxnOwHeSOOcxNwYCs-pC4Yv0,151
|
| 6 |
+
aiohttp-3.11.13.dist-info/top_level.txt,sha256=iv-JIaacmTl-hSho3QmphcKnbRRYx1st47yjz_178Ro,8
|
| 7 |
+
aiohttp/.hash/_cparser.pxd.hash,sha256=hYa9Vje-oMs2eh_7MfCPOh2QW_1x1yCjcZuc7AmwLd0,121
|
| 8 |
+
aiohttp/.hash/_find_header.pxd.hash,sha256=_mbpD6vM-CVCKq3ulUvsOAz5Wdo88wrDzfpOsMQaMNA,125
|
| 9 |
+
aiohttp/.hash/_http_parser.pyx.hash,sha256=GBgZjCNbtZApPhf9-gHpS5Z2WMIzM-vgp5VSZIEvZfk,125
|
| 10 |
+
aiohttp/.hash/_http_writer.pyx.hash,sha256=-UgSF82qclpxjP0og_gcFEsstXRKF9e3Ou4wziAyDvI,125
|
| 11 |
+
aiohttp/.hash/hdrs.py.hash,sha256=v6IaKbsxjsdQxBzhb5AjP0x_9G3rUe84D7avf7AI4cs,116
|
| 12 |
+
aiohttp/__init__.py,sha256=612FENJ9aLTJsbeZNR9LF_PAJ4nLUY4NgyqlkDnO70c,7840
|
| 13 |
+
aiohttp/__pycache__/__init__.cpython-312.pyc,,
|
| 14 |
+
aiohttp/__pycache__/abc.cpython-312.pyc,,
|
| 15 |
+
aiohttp/__pycache__/base_protocol.cpython-312.pyc,,
|
| 16 |
+
aiohttp/__pycache__/client.cpython-312.pyc,,
|
| 17 |
+
aiohttp/__pycache__/client_exceptions.cpython-312.pyc,,
|
| 18 |
+
aiohttp/__pycache__/client_proto.cpython-312.pyc,,
|
| 19 |
+
aiohttp/__pycache__/client_reqrep.cpython-312.pyc,,
|
| 20 |
+
aiohttp/__pycache__/client_ws.cpython-312.pyc,,
|
| 21 |
+
aiohttp/__pycache__/compression_utils.cpython-312.pyc,,
|
| 22 |
+
aiohttp/__pycache__/connector.cpython-312.pyc,,
|
| 23 |
+
aiohttp/__pycache__/cookiejar.cpython-312.pyc,,
|
| 24 |
+
aiohttp/__pycache__/formdata.cpython-312.pyc,,
|
| 25 |
+
aiohttp/__pycache__/hdrs.cpython-312.pyc,,
|
| 26 |
+
aiohttp/__pycache__/helpers.cpython-312.pyc,,
|
| 27 |
+
aiohttp/__pycache__/http.cpython-312.pyc,,
|
| 28 |
+
aiohttp/__pycache__/http_exceptions.cpython-312.pyc,,
|
| 29 |
+
aiohttp/__pycache__/http_parser.cpython-312.pyc,,
|
| 30 |
+
aiohttp/__pycache__/http_websocket.cpython-312.pyc,,
|
| 31 |
+
aiohttp/__pycache__/http_writer.cpython-312.pyc,,
|
| 32 |
+
aiohttp/__pycache__/log.cpython-312.pyc,,
|
| 33 |
+
aiohttp/__pycache__/multipart.cpython-312.pyc,,
|
| 34 |
+
aiohttp/__pycache__/payload.cpython-312.pyc,,
|
| 35 |
+
aiohttp/__pycache__/payload_streamer.cpython-312.pyc,,
|
| 36 |
+
aiohttp/__pycache__/pytest_plugin.cpython-312.pyc,,
|
| 37 |
+
aiohttp/__pycache__/resolver.cpython-312.pyc,,
|
| 38 |
+
aiohttp/__pycache__/streams.cpython-312.pyc,,
|
| 39 |
+
aiohttp/__pycache__/tcp_helpers.cpython-312.pyc,,
|
| 40 |
+
aiohttp/__pycache__/test_utils.cpython-312.pyc,,
|
| 41 |
+
aiohttp/__pycache__/tracing.cpython-312.pyc,,
|
| 42 |
+
aiohttp/__pycache__/typedefs.cpython-312.pyc,,
|
| 43 |
+
aiohttp/__pycache__/web.cpython-312.pyc,,
|
| 44 |
+
aiohttp/__pycache__/web_app.cpython-312.pyc,,
|
| 45 |
+
aiohttp/__pycache__/web_exceptions.cpython-312.pyc,,
|
| 46 |
+
aiohttp/__pycache__/web_fileresponse.cpython-312.pyc,,
|
| 47 |
+
aiohttp/__pycache__/web_log.cpython-312.pyc,,
|
| 48 |
+
aiohttp/__pycache__/web_middlewares.cpython-312.pyc,,
|
| 49 |
+
aiohttp/__pycache__/web_protocol.cpython-312.pyc,,
|
| 50 |
+
aiohttp/__pycache__/web_request.cpython-312.pyc,,
|
| 51 |
+
aiohttp/__pycache__/web_response.cpython-312.pyc,,
|
| 52 |
+
aiohttp/__pycache__/web_routedef.cpython-312.pyc,,
|
| 53 |
+
aiohttp/__pycache__/web_runner.cpython-312.pyc,,
|
| 54 |
+
aiohttp/__pycache__/web_server.cpython-312.pyc,,
|
| 55 |
+
aiohttp/__pycache__/web_urldispatcher.cpython-312.pyc,,
|
| 56 |
+
aiohttp/__pycache__/web_ws.cpython-312.pyc,,
|
| 57 |
+
aiohttp/__pycache__/worker.cpython-312.pyc,,
|
| 58 |
+
aiohttp/_cparser.pxd,sha256=8jGIg-VJ9p3llwCakUYDsPGxA4HiZe9dmK9Jmtlz-5g,4318
|
| 59 |
+
aiohttp/_find_header.pxd,sha256=0GfwFCPN2zxEKTO1_MA5sYq2UfzsG8kcV3aTqvwlz3g,68
|
| 60 |
+
aiohttp/_headers.pxi,sha256=n701k28dVPjwRnx5j6LpJhLTfj7dqu2vJt7f0O60Oyg,2007
|
| 61 |
+
aiohttp/_http_parser.cpython-312-x86_64-linux-gnu.so,sha256=kZJwKEDTHDTxMWYND9KqRALWr8BaLSyyTBmQ6aRYuRA,2813904
|
| 62 |
+
aiohttp/_http_parser.pyx,sha256=wQdADj5LizwC_7nFGr8nIlk6GpoaQeQ0359H0HMKGuM,28241
|
| 63 |
+
aiohttp/_http_writer.cpython-312-x86_64-linux-gnu.so,sha256=6h2_x6dsZeFsvy_Iurvq9c-RfNLg-heJJAablu4uSSk,492232
|
| 64 |
+
aiohttp/_http_writer.pyx,sha256=fiCck_EVgRiTX7VtAoV2AldjuesJMFPev4TWd9Fx8jo,4597
|
| 65 |
+
aiohttp/_websocket/.hash/mask.pxd.hash,sha256=Y0zBddk_ck3pi9-BFzMcpkcvCKvwvZ4GTtZFb9u1nxQ,128
|
| 66 |
+
aiohttp/_websocket/.hash/mask.pyx.hash,sha256=90owpXYM8_kIma4KUcOxhWSk-Uv4NVMBoCYeFM1B3d0,128
|
| 67 |
+
aiohttp/_websocket/.hash/reader_c.pxd.hash,sha256=EoZjkF_tAFEbGvV0oRY2GZOSuAfWFWFjMhXgq6mQExo,132
|
| 68 |
+
aiohttp/_websocket/__init__.py,sha256=Mar3R9_vBN_Ea4lsW7iTAVXD7OKswKPGqF5xgSyt77k,44
|
| 69 |
+
aiohttp/_websocket/__pycache__/__init__.cpython-312.pyc,,
|
| 70 |
+
aiohttp/_websocket/__pycache__/helpers.cpython-312.pyc,,
|
| 71 |
+
aiohttp/_websocket/__pycache__/models.cpython-312.pyc,,
|
| 72 |
+
aiohttp/_websocket/__pycache__/reader.cpython-312.pyc,,
|
| 73 |
+
aiohttp/_websocket/__pycache__/reader_c.cpython-312.pyc,,
|
| 74 |
+
aiohttp/_websocket/__pycache__/reader_py.cpython-312.pyc,,
|
| 75 |
+
aiohttp/_websocket/__pycache__/writer.cpython-312.pyc,,
|
| 76 |
+
aiohttp/_websocket/helpers.py,sha256=P-XLv8IUaihKzDenVUqfKU5DJbWE5HvG8uhvUZK8Ic4,5038
|
| 77 |
+
aiohttp/_websocket/mask.cpython-312-x86_64-linux-gnu.so,sha256=rdCiOTakoDIcEYBPFRf7ncQUgXmhosFnNMxUYqJn8uA,265432
|
| 78 |
+
aiohttp/_websocket/mask.pxd,sha256=sBmZ1Amym9kW4Ge8lj1fLZ7mPPya4LzLdpkQExQXv5M,112
|
| 79 |
+
aiohttp/_websocket/mask.pyx,sha256=BHjOtV0O0w7xp9p0LNADRJvGmgfPn9sGeJvSs0fL__4,1397
|
| 80 |
+
aiohttp/_websocket/models.py,sha256=XAzjs_8JYszWXIgZ6R3ZRrF-tX9Q_6LiD49WRYojopM,2121
|
| 81 |
+
aiohttp/_websocket/reader.py,sha256=eC4qS0c5sOeQ2ebAHLaBpIaTVFaSKX79pY2xvh3Pqyw,1030
|
| 82 |
+
aiohttp/_websocket/reader_c.cpython-312-x86_64-linux-gnu.so,sha256=wt49Wc5GzT9rRbxIN-8pgo3mQdTthCSVaoQR4NI6OTQ,1871856
|
| 83 |
+
aiohttp/_websocket/reader_c.pxd,sha256=9rMWCpAC1jng7_gtqLjRlqQv9q7UkOn63tIQfq2k8Gc,2444
|
| 84 |
+
aiohttp/_websocket/reader_c.py,sha256=anZsBKZWlL8SO8gArsZMDstH37qBuZOvJA7jtj0Z95M,17975
|
| 85 |
+
aiohttp/_websocket/reader_py.py,sha256=anZsBKZWlL8SO8gArsZMDstH37qBuZOvJA7jtj0Z95M,17975
|
| 86 |
+
aiohttp/_websocket/writer.py,sha256=T3P36iMrzVPPC2XeScserHMD5vd9an6yizWzqDUkRZ0,7077
|
| 87 |
+
aiohttp/abc.py,sha256=JLMOxrKLGTDaPRLfraY1pl-xka53YiHhAH9yaF9QRXQ,6512
|
| 88 |
+
aiohttp/base_protocol.py,sha256=Tp8cxUPQvv9kUPk3w6lAzk6d2MAzV3scwI_3Go3C47c,3025
|
| 89 |
+
aiohttp/client.py,sha256=isdfGlM4O5ILr4F4gBABlybxo4MQ1tNaMm7zjMcrfrM,54309
|
| 90 |
+
aiohttp/client_exceptions.py,sha256=uyKbxI2peZhKl7lELBMx3UeusNkfpemPWpGFq0r6JeM,11367
|
| 91 |
+
aiohttp/client_proto.py,sha256=dV7u9floGWG-_xtD2fLUYqiANG6VsJtq0HMlTjf1g-g,10015
|
| 92 |
+
aiohttp/client_reqrep.py,sha256=VAgh0NxP2HvYWx6nX1Pr8FINc1m-W8-5q2zKeZV68n8,43925
|
| 93 |
+
aiohttp/client_ws.py,sha256=1CIjIXwyzOMIYw6AjUES4-qUwbyVHW1seJKQfg_Rta8,15109
|
| 94 |
+
aiohttp/compression_utils.py,sha256=0J3EAOR-0HehlYIudJXRu_Kr6hrYCY0IfuJ1px9MhQs,5681
|
| 95 |
+
aiohttp/connector.py,sha256=ZAXixLOyIl6zQEnetPLYKkbjP2BoHFEzGeEyvRBoGqI,60734
|
| 96 |
+
aiohttp/cookiejar.py,sha256=PYR1K1mkLa24Hm6c9UEJnAitccNzz97CbsJyQ2ULAlU,17615
|
| 97 |
+
aiohttp/formdata.py,sha256=CUJnCWDNHFcXSYZ_TupaT6rHkY-Q7ghssvWzaYBPIo0,6552
|
| 98 |
+
aiohttp/hdrs.py,sha256=2rj5MyA-6yRdYPhW5UKkW4iNWhEAlGIOSBH5D4FmKNE,5111
|
| 99 |
+
aiohttp/helpers.py,sha256=KqPQECeiJ_EhA93k7-5ZoVdZH0sk_4n0tCoM_E-iMnE,29091
|
| 100 |
+
aiohttp/http.py,sha256=8o8j8xH70OWjnfTWA9V44NR785QPxEPrUtzMXiAVpwc,1842
|
| 101 |
+
aiohttp/http_exceptions.py,sha256=RYmBycJvvPerKkgXXm8v145I1N-fbsgSpcsbNIC-gdE,2961
|
| 102 |
+
aiohttp/http_parser.py,sha256=UqerYPJzA1MqLmeG1jURhTNO1YhwUASK3QVcNEz0me8,36851
|
| 103 |
+
aiohttp/http_websocket.py,sha256=8VXFKw6KQUEmPg48GtRMB37v0gTK7A0inoxXuDxMZEc,842
|
| 104 |
+
aiohttp/http_writer.py,sha256=pRIyfOmL3cZmdWDWBBJ2cZEwEJzLWzlPPAJInaPLThI,7595
|
| 105 |
+
aiohttp/log.py,sha256=BbNKx9e3VMIm0xYjZI0IcBBoS7wjdeIeSaiJE7-qK2g,325
|
| 106 |
+
aiohttp/multipart.py,sha256=SABIvo3vhXzG4bLDZ0C4V3yG_86vAb-3Zb9Li7BVmI8,36944
|
| 107 |
+
aiohttp/payload.py,sha256=rCA9JJI_RMCik_7qNIaC1Bh21aXhABGYK2tsYeaHRQ4,15793
|
| 108 |
+
aiohttp/payload_streamer.py,sha256=ZzEYyfzcjGWkVkK3XR2pBthSCSIykYvY3Wr5cGQ2eTc,2211
|
| 109 |
+
aiohttp/py.typed,sha256=sow9soTwP9T_gEAQSVh7Gb8855h04Nwmhs2We-JRgZM,7
|
| 110 |
+
aiohttp/pytest_plugin.py,sha256=AfJ6VIWzsp5KgpXRREsX3yqGUZrJyfb7zzcMqzWxz7I,12768
|
| 111 |
+
aiohttp/resolver.py,sha256=sJ8-LYCtl_g9f6gn_5X2NFQ9FQ72Q2Mr4_rLxo9NVeI,6375
|
| 112 |
+
aiohttp/streams.py,sha256=U-qTkuAqIfpJChuKEy-vYn8nQ_Z1MVcW0WO2DHiJz_o,22329
|
| 113 |
+
aiohttp/tcp_helpers.py,sha256=BSadqVWaBpMFDRWnhaaR941N9MiDZ7bdTrxgCb0CW-M,961
|
| 114 |
+
aiohttp/test_utils.py,sha256=r7kBasmZtC3tQY5OmyMaIl1B9P8Bnnq1oM3npVcAPKs,22811
|
| 115 |
+
aiohttp/tracing.py,sha256=66XQwtdR5DHv8p953eeNL0l8o6iHDaNwH9bBaybHXD4,15137
|
| 116 |
+
aiohttp/typedefs.py,sha256=wUlqwe9Mw9W8jT3HsYJcYk00qP3EMPz3nTkYXmeNN48,1657
|
| 117 |
+
aiohttp/web.py,sha256=As5nqGQy4QXWMXSaOsh0JudSVVJVIt_nr3n0b8CaMb0,18422
|
| 118 |
+
aiohttp/web_app.py,sha256=Zre0QHM9JAp4d7jrj5NRxlPnfTrKLNuA42Rdsh8Q2TI,19554
|
| 119 |
+
aiohttp/web_exceptions.py,sha256=7nIuiwhZ39vJJ9KrWqArA5QcWbUdqkz2CLwEpJapeN8,10360
|
| 120 |
+
aiohttp/web_fileresponse.py,sha256=FRsS0p9r1KU5y8ceG0QXBYnrL6xggjbxcXSmI6qIR4k,16504
|
| 121 |
+
aiohttp/web_log.py,sha256=rX5D7xLOX2B6BMdiZ-chme_KfJfW5IXEoFwLfkfkajs,7865
|
| 122 |
+
aiohttp/web_middlewares.py,sha256=sFI0AgeNjdyAjuz92QtMIpngmJSOxrqe2Jfbs4BNUu0,4165
|
| 123 |
+
aiohttp/web_protocol.py,sha256=0MYjcaQishUyJxJ4lsH4IfHef4nIvHDf-DSZwI1Con4,25539
|
| 124 |
+
aiohttp/web_request.py,sha256=j_SSX9s-d3ZeNyqUTpFIaPUaNdSqHwb7yfc0ufL8xFA,29750
|
| 125 |
+
aiohttp/web_response.py,sha256=65aliDETi7rZ8P76ksuHQI0ZTu1cKpclCSailNu105M,28696
|
| 126 |
+
aiohttp/web_routedef.py,sha256=VT1GAx6BrawoDh5RwBwBu5wSABSqgWwAe74AUCyZAEo,6110
|
| 127 |
+
aiohttp/web_runner.py,sha256=v1G1nKiOOQgFnTSR4IMc6I9ReEFDMaHtMLvO_roDM-A,11786
|
| 128 |
+
aiohttp/web_server.py,sha256=-9WDKUAiR9ll-rSdwXSqG6YjaoW79d1R4y0BGSqgUMA,2888
|
| 129 |
+
aiohttp/web_urldispatcher.py,sha256=TIMxFmhLjERseG0xcZv2Ef9Xuo_GTBRqBqeMkCgL0K8,43825
|
| 130 |
+
aiohttp/web_ws.py,sha256=Gr-UWgau41P-OoJUb3WJvaNYiDESXzrHmIw1Cqonupc,22612
|
| 131 |
+
aiohttp/worker.py,sha256=0lvxRNMjGM47ddlQWtci53ri9YN42Su1Vdd_Z7zMMH0,8040
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: setuptools (75.8.0)
|
| 3 |
+
Root-Is-Purelib: false
|
| 4 |
+
Tag: cp312-cp312-manylinux_2_17_x86_64
|
| 5 |
+
Tag: cp312-cp312-manylinux2014_x86_64
|
| 6 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
aiohttp
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/__init__.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from ._core._eventloop import current_time as current_time
|
| 4 |
+
from ._core._eventloop import get_all_backends as get_all_backends
|
| 5 |
+
from ._core._eventloop import get_cancelled_exc_class as get_cancelled_exc_class
|
| 6 |
+
from ._core._eventloop import run as run
|
| 7 |
+
from ._core._eventloop import sleep as sleep
|
| 8 |
+
from ._core._eventloop import sleep_forever as sleep_forever
|
| 9 |
+
from ._core._eventloop import sleep_until as sleep_until
|
| 10 |
+
from ._core._exceptions import BrokenResourceError as BrokenResourceError
|
| 11 |
+
from ._core._exceptions import BrokenWorkerIntepreter as BrokenWorkerIntepreter
|
| 12 |
+
from ._core._exceptions import BrokenWorkerProcess as BrokenWorkerProcess
|
| 13 |
+
from ._core._exceptions import BusyResourceError as BusyResourceError
|
| 14 |
+
from ._core._exceptions import ClosedResourceError as ClosedResourceError
|
| 15 |
+
from ._core._exceptions import DelimiterNotFound as DelimiterNotFound
|
| 16 |
+
from ._core._exceptions import EndOfStream as EndOfStream
|
| 17 |
+
from ._core._exceptions import IncompleteRead as IncompleteRead
|
| 18 |
+
from ._core._exceptions import TypedAttributeLookupError as TypedAttributeLookupError
|
| 19 |
+
from ._core._exceptions import WouldBlock as WouldBlock
|
| 20 |
+
from ._core._fileio import AsyncFile as AsyncFile
|
| 21 |
+
from ._core._fileio import Path as Path
|
| 22 |
+
from ._core._fileio import open_file as open_file
|
| 23 |
+
from ._core._fileio import wrap_file as wrap_file
|
| 24 |
+
from ._core._resources import aclose_forcefully as aclose_forcefully
|
| 25 |
+
from ._core._signals import open_signal_receiver as open_signal_receiver
|
| 26 |
+
from ._core._sockets import connect_tcp as connect_tcp
|
| 27 |
+
from ._core._sockets import connect_unix as connect_unix
|
| 28 |
+
from ._core._sockets import create_connected_udp_socket as create_connected_udp_socket
|
| 29 |
+
from ._core._sockets import (
|
| 30 |
+
create_connected_unix_datagram_socket as create_connected_unix_datagram_socket,
|
| 31 |
+
)
|
| 32 |
+
from ._core._sockets import create_tcp_listener as create_tcp_listener
|
| 33 |
+
from ._core._sockets import create_udp_socket as create_udp_socket
|
| 34 |
+
from ._core._sockets import create_unix_datagram_socket as create_unix_datagram_socket
|
| 35 |
+
from ._core._sockets import create_unix_listener as create_unix_listener
|
| 36 |
+
from ._core._sockets import getaddrinfo as getaddrinfo
|
| 37 |
+
from ._core._sockets import getnameinfo as getnameinfo
|
| 38 |
+
from ._core._sockets import wait_readable as wait_readable
|
| 39 |
+
from ._core._sockets import wait_socket_readable as wait_socket_readable
|
| 40 |
+
from ._core._sockets import wait_socket_writable as wait_socket_writable
|
| 41 |
+
from ._core._sockets import wait_writable as wait_writable
|
| 42 |
+
from ._core._streams import create_memory_object_stream as create_memory_object_stream
|
| 43 |
+
from ._core._subprocesses import open_process as open_process
|
| 44 |
+
from ._core._subprocesses import run_process as run_process
|
| 45 |
+
from ._core._synchronization import CapacityLimiter as CapacityLimiter
|
| 46 |
+
from ._core._synchronization import (
|
| 47 |
+
CapacityLimiterStatistics as CapacityLimiterStatistics,
|
| 48 |
+
)
|
| 49 |
+
from ._core._synchronization import Condition as Condition
|
| 50 |
+
from ._core._synchronization import ConditionStatistics as ConditionStatistics
|
| 51 |
+
from ._core._synchronization import Event as Event
|
| 52 |
+
from ._core._synchronization import EventStatistics as EventStatistics
|
| 53 |
+
from ._core._synchronization import Lock as Lock
|
| 54 |
+
from ._core._synchronization import LockStatistics as LockStatistics
|
| 55 |
+
from ._core._synchronization import ResourceGuard as ResourceGuard
|
| 56 |
+
from ._core._synchronization import Semaphore as Semaphore
|
| 57 |
+
from ._core._synchronization import SemaphoreStatistics as SemaphoreStatistics
|
| 58 |
+
from ._core._tasks import TASK_STATUS_IGNORED as TASK_STATUS_IGNORED
|
| 59 |
+
from ._core._tasks import CancelScope as CancelScope
|
| 60 |
+
from ._core._tasks import create_task_group as create_task_group
|
| 61 |
+
from ._core._tasks import current_effective_deadline as current_effective_deadline
|
| 62 |
+
from ._core._tasks import fail_after as fail_after
|
| 63 |
+
from ._core._tasks import move_on_after as move_on_after
|
| 64 |
+
from ._core._testing import TaskInfo as TaskInfo
|
| 65 |
+
from ._core._testing import get_current_task as get_current_task
|
| 66 |
+
from ._core._testing import get_running_tasks as get_running_tasks
|
| 67 |
+
from ._core._testing import wait_all_tasks_blocked as wait_all_tasks_blocked
|
| 68 |
+
from ._core._typedattr import TypedAttributeProvider as TypedAttributeProvider
|
| 69 |
+
from ._core._typedattr import TypedAttributeSet as TypedAttributeSet
|
| 70 |
+
from ._core._typedattr import typed_attribute as typed_attribute
|
| 71 |
+
|
| 72 |
+
# Re-export imports so they look like they live directly in this package
|
| 73 |
+
for __value in list(locals().values()):
|
| 74 |
+
if getattr(__value, "__module__", "").startswith("anyio."):
|
| 75 |
+
__value.__module__ = __name__
|
| 76 |
+
|
| 77 |
+
del __value
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/from_thread.py
ADDED
|
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from collections.abc import Awaitable, Callable, Generator
|
| 5 |
+
from concurrent.futures import Future
|
| 6 |
+
from contextlib import (
|
| 7 |
+
AbstractAsyncContextManager,
|
| 8 |
+
AbstractContextManager,
|
| 9 |
+
contextmanager,
|
| 10 |
+
)
|
| 11 |
+
from dataclasses import dataclass, field
|
| 12 |
+
from inspect import isawaitable
|
| 13 |
+
from threading import Lock, Thread, get_ident
|
| 14 |
+
from types import TracebackType
|
| 15 |
+
from typing import (
|
| 16 |
+
Any,
|
| 17 |
+
Generic,
|
| 18 |
+
TypeVar,
|
| 19 |
+
cast,
|
| 20 |
+
overload,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
from ._core import _eventloop
|
| 24 |
+
from ._core._eventloop import get_async_backend, get_cancelled_exc_class, threadlocals
|
| 25 |
+
from ._core._synchronization import Event
|
| 26 |
+
from ._core._tasks import CancelScope, create_task_group
|
| 27 |
+
from .abc import AsyncBackend
|
| 28 |
+
from .abc._tasks import TaskStatus
|
| 29 |
+
|
| 30 |
+
if sys.version_info >= (3, 11):
|
| 31 |
+
from typing import TypeVarTuple, Unpack
|
| 32 |
+
else:
|
| 33 |
+
from typing_extensions import TypeVarTuple, Unpack
|
| 34 |
+
|
| 35 |
+
T_Retval = TypeVar("T_Retval")
|
| 36 |
+
T_co = TypeVar("T_co", covariant=True)
|
| 37 |
+
PosArgsT = TypeVarTuple("PosArgsT")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def run(
|
| 41 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]], *args: Unpack[PosArgsT]
|
| 42 |
+
) -> T_Retval:
|
| 43 |
+
"""
|
| 44 |
+
Call a coroutine function from a worker thread.
|
| 45 |
+
|
| 46 |
+
:param func: a coroutine function
|
| 47 |
+
:param args: positional arguments for the callable
|
| 48 |
+
:return: the return value of the coroutine function
|
| 49 |
+
|
| 50 |
+
"""
|
| 51 |
+
try:
|
| 52 |
+
async_backend = threadlocals.current_async_backend
|
| 53 |
+
token = threadlocals.current_token
|
| 54 |
+
except AttributeError:
|
| 55 |
+
raise RuntimeError(
|
| 56 |
+
"This function can only be run from an AnyIO worker thread"
|
| 57 |
+
) from None
|
| 58 |
+
|
| 59 |
+
return async_backend.run_async_from_thread(func, args, token=token)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def run_sync(
|
| 63 |
+
func: Callable[[Unpack[PosArgsT]], T_Retval], *args: Unpack[PosArgsT]
|
| 64 |
+
) -> T_Retval:
|
| 65 |
+
"""
|
| 66 |
+
Call a function in the event loop thread from a worker thread.
|
| 67 |
+
|
| 68 |
+
:param func: a callable
|
| 69 |
+
:param args: positional arguments for the callable
|
| 70 |
+
:return: the return value of the callable
|
| 71 |
+
|
| 72 |
+
"""
|
| 73 |
+
try:
|
| 74 |
+
async_backend = threadlocals.current_async_backend
|
| 75 |
+
token = threadlocals.current_token
|
| 76 |
+
except AttributeError:
|
| 77 |
+
raise RuntimeError(
|
| 78 |
+
"This function can only be run from an AnyIO worker thread"
|
| 79 |
+
) from None
|
| 80 |
+
|
| 81 |
+
return async_backend.run_sync_from_thread(func, args, token=token)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class _BlockingAsyncContextManager(Generic[T_co], AbstractContextManager):
|
| 85 |
+
_enter_future: Future[T_co]
|
| 86 |
+
_exit_future: Future[bool | None]
|
| 87 |
+
_exit_event: Event
|
| 88 |
+
_exit_exc_info: tuple[
|
| 89 |
+
type[BaseException] | None, BaseException | None, TracebackType | None
|
| 90 |
+
] = (None, None, None)
|
| 91 |
+
|
| 92 |
+
def __init__(
|
| 93 |
+
self, async_cm: AbstractAsyncContextManager[T_co], portal: BlockingPortal
|
| 94 |
+
):
|
| 95 |
+
self._async_cm = async_cm
|
| 96 |
+
self._portal = portal
|
| 97 |
+
|
| 98 |
+
async def run_async_cm(self) -> bool | None:
|
| 99 |
+
try:
|
| 100 |
+
self._exit_event = Event()
|
| 101 |
+
value = await self._async_cm.__aenter__()
|
| 102 |
+
except BaseException as exc:
|
| 103 |
+
self._enter_future.set_exception(exc)
|
| 104 |
+
raise
|
| 105 |
+
else:
|
| 106 |
+
self._enter_future.set_result(value)
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
# Wait for the sync context manager to exit.
|
| 110 |
+
# This next statement can raise `get_cancelled_exc_class()` if
|
| 111 |
+
# something went wrong in a task group in this async context
|
| 112 |
+
# manager.
|
| 113 |
+
await self._exit_event.wait()
|
| 114 |
+
finally:
|
| 115 |
+
# In case of cancellation, it could be that we end up here before
|
| 116 |
+
# `_BlockingAsyncContextManager.__exit__` is called, and an
|
| 117 |
+
# `_exit_exc_info` has been set.
|
| 118 |
+
result = await self._async_cm.__aexit__(*self._exit_exc_info)
|
| 119 |
+
return result
|
| 120 |
+
|
| 121 |
+
def __enter__(self) -> T_co:
|
| 122 |
+
self._enter_future = Future()
|
| 123 |
+
self._exit_future = self._portal.start_task_soon(self.run_async_cm)
|
| 124 |
+
return self._enter_future.result()
|
| 125 |
+
|
| 126 |
+
def __exit__(
|
| 127 |
+
self,
|
| 128 |
+
__exc_type: type[BaseException] | None,
|
| 129 |
+
__exc_value: BaseException | None,
|
| 130 |
+
__traceback: TracebackType | None,
|
| 131 |
+
) -> bool | None:
|
| 132 |
+
self._exit_exc_info = __exc_type, __exc_value, __traceback
|
| 133 |
+
self._portal.call(self._exit_event.set)
|
| 134 |
+
return self._exit_future.result()
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
class _BlockingPortalTaskStatus(TaskStatus):
|
| 138 |
+
def __init__(self, future: Future):
|
| 139 |
+
self._future = future
|
| 140 |
+
|
| 141 |
+
def started(self, value: object = None) -> None:
|
| 142 |
+
self._future.set_result(value)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
class BlockingPortal:
|
| 146 |
+
"""An object that lets external threads run code in an asynchronous event loop."""
|
| 147 |
+
|
| 148 |
+
def __new__(cls) -> BlockingPortal:
|
| 149 |
+
return get_async_backend().create_blocking_portal()
|
| 150 |
+
|
| 151 |
+
def __init__(self) -> None:
|
| 152 |
+
self._event_loop_thread_id: int | None = get_ident()
|
| 153 |
+
self._stop_event = Event()
|
| 154 |
+
self._task_group = create_task_group()
|
| 155 |
+
self._cancelled_exc_class = get_cancelled_exc_class()
|
| 156 |
+
|
| 157 |
+
async def __aenter__(self) -> BlockingPortal:
|
| 158 |
+
await self._task_group.__aenter__()
|
| 159 |
+
return self
|
| 160 |
+
|
| 161 |
+
async def __aexit__(
|
| 162 |
+
self,
|
| 163 |
+
exc_type: type[BaseException] | None,
|
| 164 |
+
exc_val: BaseException | None,
|
| 165 |
+
exc_tb: TracebackType | None,
|
| 166 |
+
) -> bool | None:
|
| 167 |
+
await self.stop()
|
| 168 |
+
return await self._task_group.__aexit__(exc_type, exc_val, exc_tb)
|
| 169 |
+
|
| 170 |
+
def _check_running(self) -> None:
|
| 171 |
+
if self._event_loop_thread_id is None:
|
| 172 |
+
raise RuntimeError("This portal is not running")
|
| 173 |
+
if self._event_loop_thread_id == get_ident():
|
| 174 |
+
raise RuntimeError(
|
| 175 |
+
"This method cannot be called from the event loop thread"
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
async def sleep_until_stopped(self) -> None:
|
| 179 |
+
"""Sleep until :meth:`stop` is called."""
|
| 180 |
+
await self._stop_event.wait()
|
| 181 |
+
|
| 182 |
+
async def stop(self, cancel_remaining: bool = False) -> None:
|
| 183 |
+
"""
|
| 184 |
+
Signal the portal to shut down.
|
| 185 |
+
|
| 186 |
+
This marks the portal as no longer accepting new calls and exits from
|
| 187 |
+
:meth:`sleep_until_stopped`.
|
| 188 |
+
|
| 189 |
+
:param cancel_remaining: ``True`` to cancel all the remaining tasks, ``False``
|
| 190 |
+
to let them finish before returning
|
| 191 |
+
|
| 192 |
+
"""
|
| 193 |
+
self._event_loop_thread_id = None
|
| 194 |
+
self._stop_event.set()
|
| 195 |
+
if cancel_remaining:
|
| 196 |
+
self._task_group.cancel_scope.cancel()
|
| 197 |
+
|
| 198 |
+
async def _call_func(
|
| 199 |
+
self,
|
| 200 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
|
| 201 |
+
args: tuple[Unpack[PosArgsT]],
|
| 202 |
+
kwargs: dict[str, Any],
|
| 203 |
+
future: Future[T_Retval],
|
| 204 |
+
) -> None:
|
| 205 |
+
def callback(f: Future[T_Retval]) -> None:
|
| 206 |
+
if f.cancelled() and self._event_loop_thread_id not in (
|
| 207 |
+
None,
|
| 208 |
+
get_ident(),
|
| 209 |
+
):
|
| 210 |
+
self.call(scope.cancel)
|
| 211 |
+
|
| 212 |
+
try:
|
| 213 |
+
retval_or_awaitable = func(*args, **kwargs)
|
| 214 |
+
if isawaitable(retval_or_awaitable):
|
| 215 |
+
with CancelScope() as scope:
|
| 216 |
+
if future.cancelled():
|
| 217 |
+
scope.cancel()
|
| 218 |
+
else:
|
| 219 |
+
future.add_done_callback(callback)
|
| 220 |
+
|
| 221 |
+
retval = await retval_or_awaitable
|
| 222 |
+
else:
|
| 223 |
+
retval = retval_or_awaitable
|
| 224 |
+
except self._cancelled_exc_class:
|
| 225 |
+
future.cancel()
|
| 226 |
+
future.set_running_or_notify_cancel()
|
| 227 |
+
except BaseException as exc:
|
| 228 |
+
if not future.cancelled():
|
| 229 |
+
future.set_exception(exc)
|
| 230 |
+
|
| 231 |
+
# Let base exceptions fall through
|
| 232 |
+
if not isinstance(exc, Exception):
|
| 233 |
+
raise
|
| 234 |
+
else:
|
| 235 |
+
if not future.cancelled():
|
| 236 |
+
future.set_result(retval)
|
| 237 |
+
finally:
|
| 238 |
+
scope = None # type: ignore[assignment]
|
| 239 |
+
|
| 240 |
+
def _spawn_task_from_thread(
|
| 241 |
+
self,
|
| 242 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
|
| 243 |
+
args: tuple[Unpack[PosArgsT]],
|
| 244 |
+
kwargs: dict[str, Any],
|
| 245 |
+
name: object,
|
| 246 |
+
future: Future[T_Retval],
|
| 247 |
+
) -> None:
|
| 248 |
+
"""
|
| 249 |
+
Spawn a new task using the given callable.
|
| 250 |
+
|
| 251 |
+
Implementors must ensure that the future is resolved when the task finishes.
|
| 252 |
+
|
| 253 |
+
:param func: a callable
|
| 254 |
+
:param args: positional arguments to be passed to the callable
|
| 255 |
+
:param kwargs: keyword arguments to be passed to the callable
|
| 256 |
+
:param name: name of the task (will be coerced to a string if not ``None``)
|
| 257 |
+
:param future: a future that will resolve to the return value of the callable,
|
| 258 |
+
or the exception raised during its execution
|
| 259 |
+
|
| 260 |
+
"""
|
| 261 |
+
raise NotImplementedError
|
| 262 |
+
|
| 263 |
+
@overload
|
| 264 |
+
def call(
|
| 265 |
+
self,
|
| 266 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]],
|
| 267 |
+
*args: Unpack[PosArgsT],
|
| 268 |
+
) -> T_Retval: ...
|
| 269 |
+
|
| 270 |
+
@overload
|
| 271 |
+
def call(
|
| 272 |
+
self, func: Callable[[Unpack[PosArgsT]], T_Retval], *args: Unpack[PosArgsT]
|
| 273 |
+
) -> T_Retval: ...
|
| 274 |
+
|
| 275 |
+
def call(
|
| 276 |
+
self,
|
| 277 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
|
| 278 |
+
*args: Unpack[PosArgsT],
|
| 279 |
+
) -> T_Retval:
|
| 280 |
+
"""
|
| 281 |
+
Call the given function in the event loop thread.
|
| 282 |
+
|
| 283 |
+
If the callable returns a coroutine object, it is awaited on.
|
| 284 |
+
|
| 285 |
+
:param func: any callable
|
| 286 |
+
:raises RuntimeError: if the portal is not running or if this method is called
|
| 287 |
+
from within the event loop thread
|
| 288 |
+
|
| 289 |
+
"""
|
| 290 |
+
return cast(T_Retval, self.start_task_soon(func, *args).result())
|
| 291 |
+
|
| 292 |
+
@overload
|
| 293 |
+
def start_task_soon(
|
| 294 |
+
self,
|
| 295 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]],
|
| 296 |
+
*args: Unpack[PosArgsT],
|
| 297 |
+
name: object = None,
|
| 298 |
+
) -> Future[T_Retval]: ...
|
| 299 |
+
|
| 300 |
+
@overload
|
| 301 |
+
def start_task_soon(
|
| 302 |
+
self,
|
| 303 |
+
func: Callable[[Unpack[PosArgsT]], T_Retval],
|
| 304 |
+
*args: Unpack[PosArgsT],
|
| 305 |
+
name: object = None,
|
| 306 |
+
) -> Future[T_Retval]: ...
|
| 307 |
+
|
| 308 |
+
def start_task_soon(
|
| 309 |
+
self,
|
| 310 |
+
func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
|
| 311 |
+
*args: Unpack[PosArgsT],
|
| 312 |
+
name: object = None,
|
| 313 |
+
) -> Future[T_Retval]:
|
| 314 |
+
"""
|
| 315 |
+
Start a task in the portal's task group.
|
| 316 |
+
|
| 317 |
+
The task will be run inside a cancel scope which can be cancelled by cancelling
|
| 318 |
+
the returned future.
|
| 319 |
+
|
| 320 |
+
:param func: the target function
|
| 321 |
+
:param args: positional arguments passed to ``func``
|
| 322 |
+
:param name: name of the task (will be coerced to a string if not ``None``)
|
| 323 |
+
:return: a future that resolves with the return value of the callable if the
|
| 324 |
+
task completes successfully, or with the exception raised in the task
|
| 325 |
+
:raises RuntimeError: if the portal is not running or if this method is called
|
| 326 |
+
from within the event loop thread
|
| 327 |
+
:rtype: concurrent.futures.Future[T_Retval]
|
| 328 |
+
|
| 329 |
+
.. versionadded:: 3.0
|
| 330 |
+
|
| 331 |
+
"""
|
| 332 |
+
self._check_running()
|
| 333 |
+
f: Future[T_Retval] = Future()
|
| 334 |
+
self._spawn_task_from_thread(func, args, {}, name, f)
|
| 335 |
+
return f
|
| 336 |
+
|
| 337 |
+
def start_task(
|
| 338 |
+
self,
|
| 339 |
+
func: Callable[..., Awaitable[T_Retval]],
|
| 340 |
+
*args: object,
|
| 341 |
+
name: object = None,
|
| 342 |
+
) -> tuple[Future[T_Retval], Any]:
|
| 343 |
+
"""
|
| 344 |
+
Start a task in the portal's task group and wait until it signals for readiness.
|
| 345 |
+
|
| 346 |
+
This method works the same way as :meth:`.abc.TaskGroup.start`.
|
| 347 |
+
|
| 348 |
+
:param func: the target function
|
| 349 |
+
:param args: positional arguments passed to ``func``
|
| 350 |
+
:param name: name of the task (will be coerced to a string if not ``None``)
|
| 351 |
+
:return: a tuple of (future, task_status_value) where the ``task_status_value``
|
| 352 |
+
is the value passed to ``task_status.started()`` from within the target
|
| 353 |
+
function
|
| 354 |
+
:rtype: tuple[concurrent.futures.Future[T_Retval], Any]
|
| 355 |
+
|
| 356 |
+
.. versionadded:: 3.0
|
| 357 |
+
|
| 358 |
+
"""
|
| 359 |
+
|
| 360 |
+
def task_done(future: Future[T_Retval]) -> None:
|
| 361 |
+
if not task_status_future.done():
|
| 362 |
+
if future.cancelled():
|
| 363 |
+
task_status_future.cancel()
|
| 364 |
+
elif future.exception():
|
| 365 |
+
task_status_future.set_exception(future.exception())
|
| 366 |
+
else:
|
| 367 |
+
exc = RuntimeError(
|
| 368 |
+
"Task exited without calling task_status.started()"
|
| 369 |
+
)
|
| 370 |
+
task_status_future.set_exception(exc)
|
| 371 |
+
|
| 372 |
+
self._check_running()
|
| 373 |
+
task_status_future: Future = Future()
|
| 374 |
+
task_status = _BlockingPortalTaskStatus(task_status_future)
|
| 375 |
+
f: Future = Future()
|
| 376 |
+
f.add_done_callback(task_done)
|
| 377 |
+
self._spawn_task_from_thread(func, args, {"task_status": task_status}, name, f)
|
| 378 |
+
return f, task_status_future.result()
|
| 379 |
+
|
| 380 |
+
def wrap_async_context_manager(
|
| 381 |
+
self, cm: AbstractAsyncContextManager[T_co]
|
| 382 |
+
) -> AbstractContextManager[T_co]:
|
| 383 |
+
"""
|
| 384 |
+
Wrap an async context manager as a synchronous context manager via this portal.
|
| 385 |
+
|
| 386 |
+
Spawns a task that will call both ``__aenter__()`` and ``__aexit__()``, stopping
|
| 387 |
+
in the middle until the synchronous context manager exits.
|
| 388 |
+
|
| 389 |
+
:param cm: an asynchronous context manager
|
| 390 |
+
:return: a synchronous context manager
|
| 391 |
+
|
| 392 |
+
.. versionadded:: 2.1
|
| 393 |
+
|
| 394 |
+
"""
|
| 395 |
+
return _BlockingAsyncContextManager(cm, self)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
@dataclass
|
| 399 |
+
class BlockingPortalProvider:
|
| 400 |
+
"""
|
| 401 |
+
A manager for a blocking portal. Used as a context manager. The first thread to
|
| 402 |
+
enter this context manager causes a blocking portal to be started with the specific
|
| 403 |
+
parameters, and the last thread to exit causes the portal to be shut down. Thus,
|
| 404 |
+
there will be exactly one blocking portal running in this context as long as at
|
| 405 |
+
least one thread has entered this context manager.
|
| 406 |
+
|
| 407 |
+
The parameters are the same as for :func:`~anyio.run`.
|
| 408 |
+
|
| 409 |
+
:param backend: name of the backend
|
| 410 |
+
:param backend_options: backend options
|
| 411 |
+
|
| 412 |
+
.. versionadded:: 4.4
|
| 413 |
+
"""
|
| 414 |
+
|
| 415 |
+
backend: str = "asyncio"
|
| 416 |
+
backend_options: dict[str, Any] | None = None
|
| 417 |
+
_lock: Lock = field(init=False, default_factory=Lock)
|
| 418 |
+
_leases: int = field(init=False, default=0)
|
| 419 |
+
_portal: BlockingPortal = field(init=False)
|
| 420 |
+
_portal_cm: AbstractContextManager[BlockingPortal] | None = field(
|
| 421 |
+
init=False, default=None
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
def __enter__(self) -> BlockingPortal:
|
| 425 |
+
with self._lock:
|
| 426 |
+
if self._portal_cm is None:
|
| 427 |
+
self._portal_cm = start_blocking_portal(
|
| 428 |
+
self.backend, self.backend_options
|
| 429 |
+
)
|
| 430 |
+
self._portal = self._portal_cm.__enter__()
|
| 431 |
+
|
| 432 |
+
self._leases += 1
|
| 433 |
+
return self._portal
|
| 434 |
+
|
| 435 |
+
def __exit__(
|
| 436 |
+
self,
|
| 437 |
+
exc_type: type[BaseException] | None,
|
| 438 |
+
exc_val: BaseException | None,
|
| 439 |
+
exc_tb: TracebackType | None,
|
| 440 |
+
) -> None:
|
| 441 |
+
portal_cm: AbstractContextManager[BlockingPortal] | None = None
|
| 442 |
+
with self._lock:
|
| 443 |
+
assert self._portal_cm
|
| 444 |
+
assert self._leases > 0
|
| 445 |
+
self._leases -= 1
|
| 446 |
+
if not self._leases:
|
| 447 |
+
portal_cm = self._portal_cm
|
| 448 |
+
self._portal_cm = None
|
| 449 |
+
del self._portal
|
| 450 |
+
|
| 451 |
+
if portal_cm:
|
| 452 |
+
portal_cm.__exit__(None, None, None)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
@contextmanager
|
| 456 |
+
def start_blocking_portal(
|
| 457 |
+
backend: str = "asyncio", backend_options: dict[str, Any] | None = None
|
| 458 |
+
) -> Generator[BlockingPortal, Any, None]:
|
| 459 |
+
"""
|
| 460 |
+
Start a new event loop in a new thread and run a blocking portal in its main task.
|
| 461 |
+
|
| 462 |
+
The parameters are the same as for :func:`~anyio.run`.
|
| 463 |
+
|
| 464 |
+
:param backend: name of the backend
|
| 465 |
+
:param backend_options: backend options
|
| 466 |
+
:return: a context manager that yields a blocking portal
|
| 467 |
+
|
| 468 |
+
.. versionchanged:: 3.0
|
| 469 |
+
Usage as a context manager is now required.
|
| 470 |
+
|
| 471 |
+
"""
|
| 472 |
+
|
| 473 |
+
async def run_portal() -> None:
|
| 474 |
+
async with BlockingPortal() as portal_:
|
| 475 |
+
future.set_result(portal_)
|
| 476 |
+
await portal_.sleep_until_stopped()
|
| 477 |
+
|
| 478 |
+
def run_blocking_portal() -> None:
|
| 479 |
+
if future.set_running_or_notify_cancel():
|
| 480 |
+
try:
|
| 481 |
+
_eventloop.run(
|
| 482 |
+
run_portal, backend=backend, backend_options=backend_options
|
| 483 |
+
)
|
| 484 |
+
except BaseException as exc:
|
| 485 |
+
if not future.done():
|
| 486 |
+
future.set_exception(exc)
|
| 487 |
+
|
| 488 |
+
future: Future[BlockingPortal] = Future()
|
| 489 |
+
thread = Thread(target=run_blocking_portal, daemon=True)
|
| 490 |
+
thread.start()
|
| 491 |
+
try:
|
| 492 |
+
cancel_remaining_tasks = False
|
| 493 |
+
portal = future.result()
|
| 494 |
+
try:
|
| 495 |
+
yield portal
|
| 496 |
+
except BaseException:
|
| 497 |
+
cancel_remaining_tasks = True
|
| 498 |
+
raise
|
| 499 |
+
finally:
|
| 500 |
+
try:
|
| 501 |
+
portal.call(portal.stop, cancel_remaining_tasks)
|
| 502 |
+
except RuntimeError:
|
| 503 |
+
pass
|
| 504 |
+
finally:
|
| 505 |
+
thread.join()
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
def check_cancelled() -> None:
|
| 509 |
+
"""
|
| 510 |
+
Check if the cancel scope of the host task's running the current worker thread has
|
| 511 |
+
been cancelled.
|
| 512 |
+
|
| 513 |
+
If the host task's current cancel scope has indeed been cancelled, the
|
| 514 |
+
backend-specific cancellation exception will be raised.
|
| 515 |
+
|
| 516 |
+
:raises RuntimeError: if the current thread was not spawned by
|
| 517 |
+
:func:`.to_thread.run_sync`
|
| 518 |
+
|
| 519 |
+
"""
|
| 520 |
+
try:
|
| 521 |
+
async_backend: AsyncBackend = threadlocals.current_async_backend
|
| 522 |
+
except AttributeError:
|
| 523 |
+
raise RuntimeError(
|
| 524 |
+
"This function can only be run from an AnyIO worker thread"
|
| 525 |
+
) from None
|
| 526 |
+
|
| 527 |
+
async_backend.check_cancelled()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/lowlevel.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import enum
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from typing import Any, Generic, Literal, TypeVar, overload
|
| 6 |
+
from weakref import WeakKeyDictionary
|
| 7 |
+
|
| 8 |
+
from ._core._eventloop import get_async_backend
|
| 9 |
+
|
| 10 |
+
T = TypeVar("T")
|
| 11 |
+
D = TypeVar("D")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
async def checkpoint() -> None:
|
| 15 |
+
"""
|
| 16 |
+
Check for cancellation and allow the scheduler to switch to another task.
|
| 17 |
+
|
| 18 |
+
Equivalent to (but more efficient than)::
|
| 19 |
+
|
| 20 |
+
await checkpoint_if_cancelled()
|
| 21 |
+
await cancel_shielded_checkpoint()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
.. versionadded:: 3.0
|
| 25 |
+
|
| 26 |
+
"""
|
| 27 |
+
await get_async_backend().checkpoint()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
async def checkpoint_if_cancelled() -> None:
|
| 31 |
+
"""
|
| 32 |
+
Enter a checkpoint if the enclosing cancel scope has been cancelled.
|
| 33 |
+
|
| 34 |
+
This does not allow the scheduler to switch to a different task.
|
| 35 |
+
|
| 36 |
+
.. versionadded:: 3.0
|
| 37 |
+
|
| 38 |
+
"""
|
| 39 |
+
await get_async_backend().checkpoint_if_cancelled()
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
async def cancel_shielded_checkpoint() -> None:
|
| 43 |
+
"""
|
| 44 |
+
Allow the scheduler to switch to another task but without checking for cancellation.
|
| 45 |
+
|
| 46 |
+
Equivalent to (but potentially more efficient than)::
|
| 47 |
+
|
| 48 |
+
with CancelScope(shield=True):
|
| 49 |
+
await checkpoint()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
.. versionadded:: 3.0
|
| 53 |
+
|
| 54 |
+
"""
|
| 55 |
+
await get_async_backend().cancel_shielded_checkpoint()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def current_token() -> object:
|
| 59 |
+
"""
|
| 60 |
+
Return a backend specific token object that can be used to get back to the event
|
| 61 |
+
loop.
|
| 62 |
+
|
| 63 |
+
"""
|
| 64 |
+
return get_async_backend().current_token()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
_run_vars: WeakKeyDictionary[Any, dict[str, Any]] = WeakKeyDictionary()
|
| 68 |
+
_token_wrappers: dict[Any, _TokenWrapper] = {}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@dataclass(frozen=True)
|
| 72 |
+
class _TokenWrapper:
|
| 73 |
+
__slots__ = "_token", "__weakref__"
|
| 74 |
+
_token: object
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class _NoValueSet(enum.Enum):
|
| 78 |
+
NO_VALUE_SET = enum.auto()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class RunvarToken(Generic[T]):
|
| 82 |
+
__slots__ = "_var", "_value", "_redeemed"
|
| 83 |
+
|
| 84 |
+
def __init__(self, var: RunVar[T], value: T | Literal[_NoValueSet.NO_VALUE_SET]):
|
| 85 |
+
self._var = var
|
| 86 |
+
self._value: T | Literal[_NoValueSet.NO_VALUE_SET] = value
|
| 87 |
+
self._redeemed = False
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class RunVar(Generic[T]):
|
| 91 |
+
"""
|
| 92 |
+
Like a :class:`~contextvars.ContextVar`, except scoped to the running event loop.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
__slots__ = "_name", "_default"
|
| 96 |
+
|
| 97 |
+
NO_VALUE_SET: Literal[_NoValueSet.NO_VALUE_SET] = _NoValueSet.NO_VALUE_SET
|
| 98 |
+
|
| 99 |
+
_token_wrappers: set[_TokenWrapper] = set()
|
| 100 |
+
|
| 101 |
+
def __init__(
|
| 102 |
+
self, name: str, default: T | Literal[_NoValueSet.NO_VALUE_SET] = NO_VALUE_SET
|
| 103 |
+
):
|
| 104 |
+
self._name = name
|
| 105 |
+
self._default = default
|
| 106 |
+
|
| 107 |
+
@property
|
| 108 |
+
def _current_vars(self) -> dict[str, T]:
|
| 109 |
+
token = current_token()
|
| 110 |
+
try:
|
| 111 |
+
return _run_vars[token]
|
| 112 |
+
except KeyError:
|
| 113 |
+
run_vars = _run_vars[token] = {}
|
| 114 |
+
return run_vars
|
| 115 |
+
|
| 116 |
+
@overload
|
| 117 |
+
def get(self, default: D) -> T | D: ...
|
| 118 |
+
|
| 119 |
+
@overload
|
| 120 |
+
def get(self) -> T: ...
|
| 121 |
+
|
| 122 |
+
def get(
|
| 123 |
+
self, default: D | Literal[_NoValueSet.NO_VALUE_SET] = NO_VALUE_SET
|
| 124 |
+
) -> T | D:
|
| 125 |
+
try:
|
| 126 |
+
return self._current_vars[self._name]
|
| 127 |
+
except KeyError:
|
| 128 |
+
if default is not RunVar.NO_VALUE_SET:
|
| 129 |
+
return default
|
| 130 |
+
elif self._default is not RunVar.NO_VALUE_SET:
|
| 131 |
+
return self._default
|
| 132 |
+
|
| 133 |
+
raise LookupError(
|
| 134 |
+
f'Run variable "{self._name}" has no value and no default set'
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
def set(self, value: T) -> RunvarToken[T]:
|
| 138 |
+
current_vars = self._current_vars
|
| 139 |
+
token = RunvarToken(self, current_vars.get(self._name, RunVar.NO_VALUE_SET))
|
| 140 |
+
current_vars[self._name] = value
|
| 141 |
+
return token
|
| 142 |
+
|
| 143 |
+
def reset(self, token: RunvarToken[T]) -> None:
|
| 144 |
+
if token._var is not self:
|
| 145 |
+
raise ValueError("This token does not belong to this RunVar")
|
| 146 |
+
|
| 147 |
+
if token._redeemed:
|
| 148 |
+
raise ValueError("This token has already been used")
|
| 149 |
+
|
| 150 |
+
if token._value is _NoValueSet.NO_VALUE_SET:
|
| 151 |
+
try:
|
| 152 |
+
del self._current_vars[self._name]
|
| 153 |
+
except KeyError:
|
| 154 |
+
pass
|
| 155 |
+
else:
|
| 156 |
+
self._current_vars[self._name] = token._value
|
| 157 |
+
|
| 158 |
+
token._redeemed = True
|
| 159 |
+
|
| 160 |
+
def __repr__(self) -> str:
|
| 161 |
+
return f"<RunVar name={self._name!r}>"
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/py.typed
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/pytest_plugin.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from collections.abc import Generator, Iterator
|
| 5 |
+
from contextlib import ExitStack, contextmanager
|
| 6 |
+
from inspect import isasyncgenfunction, iscoroutinefunction, ismethod
|
| 7 |
+
from typing import Any, cast
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
import sniffio
|
| 11 |
+
from _pytest.fixtures import SubRequest
|
| 12 |
+
from _pytest.outcomes import Exit
|
| 13 |
+
|
| 14 |
+
from ._core._eventloop import get_all_backends, get_async_backend
|
| 15 |
+
from ._core._exceptions import iterate_exceptions
|
| 16 |
+
from .abc import TestRunner
|
| 17 |
+
|
| 18 |
+
if sys.version_info < (3, 11):
|
| 19 |
+
from exceptiongroup import ExceptionGroup
|
| 20 |
+
|
| 21 |
+
_current_runner: TestRunner | None = None
|
| 22 |
+
_runner_stack: ExitStack | None = None
|
| 23 |
+
_runner_leases = 0
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def extract_backend_and_options(backend: object) -> tuple[str, dict[str, Any]]:
|
| 27 |
+
if isinstance(backend, str):
|
| 28 |
+
return backend, {}
|
| 29 |
+
elif isinstance(backend, tuple) and len(backend) == 2:
|
| 30 |
+
if isinstance(backend[0], str) and isinstance(backend[1], dict):
|
| 31 |
+
return cast(tuple[str, dict[str, Any]], backend)
|
| 32 |
+
|
| 33 |
+
raise TypeError("anyio_backend must be either a string or tuple of (string, dict)")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@contextmanager
|
| 37 |
+
def get_runner(
|
| 38 |
+
backend_name: str, backend_options: dict[str, Any]
|
| 39 |
+
) -> Iterator[TestRunner]:
|
| 40 |
+
global _current_runner, _runner_leases, _runner_stack
|
| 41 |
+
if _current_runner is None:
|
| 42 |
+
asynclib = get_async_backend(backend_name)
|
| 43 |
+
_runner_stack = ExitStack()
|
| 44 |
+
if sniffio.current_async_library_cvar.get(None) is None:
|
| 45 |
+
# Since we're in control of the event loop, we can cache the name of the
|
| 46 |
+
# async library
|
| 47 |
+
token = sniffio.current_async_library_cvar.set(backend_name)
|
| 48 |
+
_runner_stack.callback(sniffio.current_async_library_cvar.reset, token)
|
| 49 |
+
|
| 50 |
+
backend_options = backend_options or {}
|
| 51 |
+
_current_runner = _runner_stack.enter_context(
|
| 52 |
+
asynclib.create_test_runner(backend_options)
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
_runner_leases += 1
|
| 56 |
+
try:
|
| 57 |
+
yield _current_runner
|
| 58 |
+
finally:
|
| 59 |
+
_runner_leases -= 1
|
| 60 |
+
if not _runner_leases:
|
| 61 |
+
assert _runner_stack is not None
|
| 62 |
+
_runner_stack.close()
|
| 63 |
+
_runner_stack = _current_runner = None
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def pytest_configure(config: Any) -> None:
|
| 67 |
+
config.addinivalue_line(
|
| 68 |
+
"markers",
|
| 69 |
+
"anyio: mark the (coroutine function) test to be run "
|
| 70 |
+
"asynchronously via anyio.",
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@pytest.hookimpl(hookwrapper=True)
|
| 75 |
+
def pytest_fixture_setup(fixturedef: Any, request: Any) -> Generator[Any]:
|
| 76 |
+
def wrapper(
|
| 77 |
+
*args: Any, anyio_backend: Any, request: SubRequest, **kwargs: Any
|
| 78 |
+
) -> Any:
|
| 79 |
+
# Rebind any fixture methods to the request instance
|
| 80 |
+
if (
|
| 81 |
+
request.instance
|
| 82 |
+
and ismethod(func)
|
| 83 |
+
and type(func.__self__) is type(request.instance)
|
| 84 |
+
):
|
| 85 |
+
local_func = func.__func__.__get__(request.instance)
|
| 86 |
+
else:
|
| 87 |
+
local_func = func
|
| 88 |
+
|
| 89 |
+
backend_name, backend_options = extract_backend_and_options(anyio_backend)
|
| 90 |
+
if has_backend_arg:
|
| 91 |
+
kwargs["anyio_backend"] = anyio_backend
|
| 92 |
+
|
| 93 |
+
if has_request_arg:
|
| 94 |
+
kwargs["request"] = request
|
| 95 |
+
|
| 96 |
+
with get_runner(backend_name, backend_options) as runner:
|
| 97 |
+
if isasyncgenfunction(local_func):
|
| 98 |
+
yield from runner.run_asyncgen_fixture(local_func, kwargs)
|
| 99 |
+
else:
|
| 100 |
+
yield runner.run_fixture(local_func, kwargs)
|
| 101 |
+
|
| 102 |
+
# Only apply this to coroutine functions and async generator functions in requests
|
| 103 |
+
# that involve the anyio_backend fixture
|
| 104 |
+
func = fixturedef.func
|
| 105 |
+
if isasyncgenfunction(func) or iscoroutinefunction(func):
|
| 106 |
+
if "anyio_backend" in request.fixturenames:
|
| 107 |
+
fixturedef.func = wrapper
|
| 108 |
+
original_argname = fixturedef.argnames
|
| 109 |
+
|
| 110 |
+
if not (has_backend_arg := "anyio_backend" in fixturedef.argnames):
|
| 111 |
+
fixturedef.argnames += ("anyio_backend",)
|
| 112 |
+
|
| 113 |
+
if not (has_request_arg := "request" in fixturedef.argnames):
|
| 114 |
+
fixturedef.argnames += ("request",)
|
| 115 |
+
|
| 116 |
+
try:
|
| 117 |
+
return (yield)
|
| 118 |
+
finally:
|
| 119 |
+
fixturedef.func = func
|
| 120 |
+
fixturedef.argnames = original_argname
|
| 121 |
+
|
| 122 |
+
return (yield)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
@pytest.hookimpl(tryfirst=True)
|
| 126 |
+
def pytest_pycollect_makeitem(collector: Any, name: Any, obj: Any) -> None:
|
| 127 |
+
if collector.istestfunction(obj, name):
|
| 128 |
+
inner_func = obj.hypothesis.inner_test if hasattr(obj, "hypothesis") else obj
|
| 129 |
+
if iscoroutinefunction(inner_func):
|
| 130 |
+
marker = collector.get_closest_marker("anyio")
|
| 131 |
+
own_markers = getattr(obj, "pytestmark", ())
|
| 132 |
+
if marker or any(marker.name == "anyio" for marker in own_markers):
|
| 133 |
+
pytest.mark.usefixtures("anyio_backend")(obj)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
@pytest.hookimpl(tryfirst=True)
|
| 137 |
+
def pytest_pyfunc_call(pyfuncitem: Any) -> bool | None:
|
| 138 |
+
def run_with_hypothesis(**kwargs: Any) -> None:
|
| 139 |
+
with get_runner(backend_name, backend_options) as runner:
|
| 140 |
+
runner.run_test(original_func, kwargs)
|
| 141 |
+
|
| 142 |
+
backend = pyfuncitem.funcargs.get("anyio_backend")
|
| 143 |
+
if backend:
|
| 144 |
+
backend_name, backend_options = extract_backend_and_options(backend)
|
| 145 |
+
|
| 146 |
+
if hasattr(pyfuncitem.obj, "hypothesis"):
|
| 147 |
+
# Wrap the inner test function unless it's already wrapped
|
| 148 |
+
original_func = pyfuncitem.obj.hypothesis.inner_test
|
| 149 |
+
if original_func.__qualname__ != run_with_hypothesis.__qualname__:
|
| 150 |
+
if iscoroutinefunction(original_func):
|
| 151 |
+
pyfuncitem.obj.hypothesis.inner_test = run_with_hypothesis
|
| 152 |
+
|
| 153 |
+
return None
|
| 154 |
+
|
| 155 |
+
if iscoroutinefunction(pyfuncitem.obj):
|
| 156 |
+
funcargs = pyfuncitem.funcargs
|
| 157 |
+
testargs = {arg: funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames}
|
| 158 |
+
with get_runner(backend_name, backend_options) as runner:
|
| 159 |
+
try:
|
| 160 |
+
runner.run_test(pyfuncitem.obj, testargs)
|
| 161 |
+
except ExceptionGroup as excgrp:
|
| 162 |
+
for exc in iterate_exceptions(excgrp):
|
| 163 |
+
if isinstance(exc, (Exit, KeyboardInterrupt, SystemExit)):
|
| 164 |
+
raise exc from excgrp
|
| 165 |
+
|
| 166 |
+
raise
|
| 167 |
+
|
| 168 |
+
return True
|
| 169 |
+
|
| 170 |
+
return None
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
@pytest.fixture(scope="module", params=get_all_backends())
|
| 174 |
+
def anyio_backend(request: Any) -> Any:
|
| 175 |
+
return request.param
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
@pytest.fixture
|
| 179 |
+
def anyio_backend_name(anyio_backend: Any) -> str:
|
| 180 |
+
if isinstance(anyio_backend, str):
|
| 181 |
+
return anyio_backend
|
| 182 |
+
else:
|
| 183 |
+
return anyio_backend[0]
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
@pytest.fixture
|
| 187 |
+
def anyio_backend_options(anyio_backend: Any) -> dict[str, Any]:
|
| 188 |
+
if isinstance(anyio_backend, str):
|
| 189 |
+
return {}
|
| 190 |
+
else:
|
| 191 |
+
return anyio_backend[1]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_interpreter.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import atexit
|
| 4 |
+
import os
|
| 5 |
+
import pickle
|
| 6 |
+
import sys
|
| 7 |
+
from collections import deque
|
| 8 |
+
from collections.abc import Callable
|
| 9 |
+
from textwrap import dedent
|
| 10 |
+
from typing import Any, Final, TypeVar
|
| 11 |
+
|
| 12 |
+
from . import current_time, to_thread
|
| 13 |
+
from ._core._exceptions import BrokenWorkerIntepreter
|
| 14 |
+
from ._core._synchronization import CapacityLimiter
|
| 15 |
+
from .lowlevel import RunVar
|
| 16 |
+
|
| 17 |
+
if sys.version_info >= (3, 11):
|
| 18 |
+
from typing import TypeVarTuple, Unpack
|
| 19 |
+
else:
|
| 20 |
+
from typing_extensions import TypeVarTuple, Unpack
|
| 21 |
+
|
| 22 |
+
UNBOUND: Final = 2 # I have no clue how this works, but it was used in the stdlib
|
| 23 |
+
FMT_UNPICKLED: Final = 0
|
| 24 |
+
FMT_PICKLED: Final = 1
|
| 25 |
+
DEFAULT_CPU_COUNT: Final = 8 # this is just an arbitrarily selected value
|
| 26 |
+
MAX_WORKER_IDLE_TIME = (
|
| 27 |
+
30 # seconds a subinterpreter can be idle before becoming eligible for pruning
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
T_Retval = TypeVar("T_Retval")
|
| 31 |
+
PosArgsT = TypeVarTuple("PosArgsT")
|
| 32 |
+
|
| 33 |
+
_idle_workers = RunVar[deque["Worker"]]("_available_workers")
|
| 34 |
+
_default_interpreter_limiter = RunVar[CapacityLimiter]("_default_interpreter_limiter")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class Worker:
|
| 38 |
+
_run_func = compile(
|
| 39 |
+
dedent("""
|
| 40 |
+
import _interpqueues as queues
|
| 41 |
+
import _interpreters as interpreters
|
| 42 |
+
from pickle import loads, dumps, HIGHEST_PROTOCOL
|
| 43 |
+
|
| 44 |
+
item = queues.get(queue_id)[0]
|
| 45 |
+
try:
|
| 46 |
+
func, args = loads(item)
|
| 47 |
+
retval = func(*args)
|
| 48 |
+
except BaseException as exc:
|
| 49 |
+
is_exception = True
|
| 50 |
+
retval = exc
|
| 51 |
+
else:
|
| 52 |
+
is_exception = False
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
queues.put(queue_id, (retval, is_exception), FMT_UNPICKLED, UNBOUND)
|
| 56 |
+
except interpreters.NotShareableError:
|
| 57 |
+
retval = dumps(retval, HIGHEST_PROTOCOL)
|
| 58 |
+
queues.put(queue_id, (retval, is_exception), FMT_PICKLED, UNBOUND)
|
| 59 |
+
"""),
|
| 60 |
+
"<string>",
|
| 61 |
+
"exec",
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
last_used: float = 0
|
| 65 |
+
|
| 66 |
+
_initialized: bool = False
|
| 67 |
+
_interpreter_id: int
|
| 68 |
+
_queue_id: int
|
| 69 |
+
|
| 70 |
+
def initialize(self) -> None:
|
| 71 |
+
import _interpqueues as queues
|
| 72 |
+
import _interpreters as interpreters
|
| 73 |
+
|
| 74 |
+
self._interpreter_id = interpreters.create()
|
| 75 |
+
self._queue_id = queues.create(2, FMT_UNPICKLED, UNBOUND) # type: ignore[call-arg]
|
| 76 |
+
self._initialized = True
|
| 77 |
+
interpreters.set___main___attrs(
|
| 78 |
+
self._interpreter_id,
|
| 79 |
+
{
|
| 80 |
+
"queue_id": self._queue_id,
|
| 81 |
+
"FMT_PICKLED": FMT_PICKLED,
|
| 82 |
+
"FMT_UNPICKLED": FMT_UNPICKLED,
|
| 83 |
+
"UNBOUND": UNBOUND,
|
| 84 |
+
},
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
def destroy(self) -> None:
|
| 88 |
+
import _interpqueues as queues
|
| 89 |
+
import _interpreters as interpreters
|
| 90 |
+
|
| 91 |
+
if self._initialized:
|
| 92 |
+
interpreters.destroy(self._interpreter_id)
|
| 93 |
+
queues.destroy(self._queue_id)
|
| 94 |
+
|
| 95 |
+
def _call(
|
| 96 |
+
self,
|
| 97 |
+
func: Callable[..., T_Retval],
|
| 98 |
+
args: tuple[Any],
|
| 99 |
+
) -> tuple[Any, bool]:
|
| 100 |
+
import _interpqueues as queues
|
| 101 |
+
import _interpreters as interpreters
|
| 102 |
+
|
| 103 |
+
if not self._initialized:
|
| 104 |
+
self.initialize()
|
| 105 |
+
|
| 106 |
+
payload = pickle.dumps((func, args), pickle.HIGHEST_PROTOCOL)
|
| 107 |
+
queues.put(self._queue_id, payload, FMT_PICKLED, UNBOUND) # type: ignore[call-arg]
|
| 108 |
+
|
| 109 |
+
res: Any
|
| 110 |
+
is_exception: bool
|
| 111 |
+
if exc_info := interpreters.exec(self._interpreter_id, self._run_func): # type: ignore[func-returns-value,arg-type]
|
| 112 |
+
raise BrokenWorkerIntepreter(exc_info)
|
| 113 |
+
|
| 114 |
+
(res, is_exception), fmt = queues.get(self._queue_id)[:2]
|
| 115 |
+
if fmt == FMT_PICKLED:
|
| 116 |
+
res = pickle.loads(res)
|
| 117 |
+
|
| 118 |
+
return res, is_exception
|
| 119 |
+
|
| 120 |
+
async def call(
|
| 121 |
+
self,
|
| 122 |
+
func: Callable[..., T_Retval],
|
| 123 |
+
args: tuple[Any],
|
| 124 |
+
limiter: CapacityLimiter,
|
| 125 |
+
) -> T_Retval:
|
| 126 |
+
result, is_exception = await to_thread.run_sync(
|
| 127 |
+
self._call,
|
| 128 |
+
func,
|
| 129 |
+
args,
|
| 130 |
+
limiter=limiter,
|
| 131 |
+
)
|
| 132 |
+
if is_exception:
|
| 133 |
+
raise result
|
| 134 |
+
|
| 135 |
+
return result
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _stop_workers(workers: deque[Worker]) -> None:
|
| 139 |
+
for worker in workers:
|
| 140 |
+
worker.destroy()
|
| 141 |
+
|
| 142 |
+
workers.clear()
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
async def run_sync(
|
| 146 |
+
func: Callable[[Unpack[PosArgsT]], T_Retval],
|
| 147 |
+
*args: Unpack[PosArgsT],
|
| 148 |
+
limiter: CapacityLimiter | None = None,
|
| 149 |
+
) -> T_Retval:
|
| 150 |
+
"""
|
| 151 |
+
Call the given function with the given arguments in a subinterpreter.
|
| 152 |
+
|
| 153 |
+
If the ``cancellable`` option is enabled and the task waiting for its completion is
|
| 154 |
+
cancelled, the call will still run its course but its return value (or any raised
|
| 155 |
+
exception) will be ignored.
|
| 156 |
+
|
| 157 |
+
.. warning:: This feature is **experimental**. The upstream interpreter API has not
|
| 158 |
+
yet been finalized or thoroughly tested, so don't rely on this for anything
|
| 159 |
+
mission critical.
|
| 160 |
+
|
| 161 |
+
:param func: a callable
|
| 162 |
+
:param args: positional arguments for the callable
|
| 163 |
+
:param limiter: capacity limiter to use to limit the total amount of subinterpreters
|
| 164 |
+
running (if omitted, the default limiter is used)
|
| 165 |
+
:return: the result of the call
|
| 166 |
+
:raises BrokenWorkerIntepreter: if there's an internal error in a subinterpreter
|
| 167 |
+
|
| 168 |
+
"""
|
| 169 |
+
if sys.version_info <= (3, 13):
|
| 170 |
+
raise RuntimeError("subinterpreters require at least Python 3.13")
|
| 171 |
+
|
| 172 |
+
if limiter is None:
|
| 173 |
+
limiter = current_default_interpreter_limiter()
|
| 174 |
+
|
| 175 |
+
try:
|
| 176 |
+
idle_workers = _idle_workers.get()
|
| 177 |
+
except LookupError:
|
| 178 |
+
idle_workers = deque()
|
| 179 |
+
_idle_workers.set(idle_workers)
|
| 180 |
+
atexit.register(_stop_workers, idle_workers)
|
| 181 |
+
|
| 182 |
+
async with limiter:
|
| 183 |
+
try:
|
| 184 |
+
worker = idle_workers.pop()
|
| 185 |
+
except IndexError:
|
| 186 |
+
worker = Worker()
|
| 187 |
+
|
| 188 |
+
try:
|
| 189 |
+
return await worker.call(func, args, limiter)
|
| 190 |
+
finally:
|
| 191 |
+
# Prune workers that have been idle for too long
|
| 192 |
+
now = current_time()
|
| 193 |
+
while idle_workers:
|
| 194 |
+
if now - idle_workers[0].last_used <= MAX_WORKER_IDLE_TIME:
|
| 195 |
+
break
|
| 196 |
+
|
| 197 |
+
await to_thread.run_sync(idle_workers.popleft().destroy, limiter=limiter)
|
| 198 |
+
|
| 199 |
+
worker.last_used = current_time()
|
| 200 |
+
idle_workers.append(worker)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def current_default_interpreter_limiter() -> CapacityLimiter:
|
| 204 |
+
"""
|
| 205 |
+
Return the capacity limiter that is used by default to limit the number of
|
| 206 |
+
concurrently running subinterpreters.
|
| 207 |
+
|
| 208 |
+
Defaults to the number of CPU cores.
|
| 209 |
+
|
| 210 |
+
:return: a capacity limiter object
|
| 211 |
+
|
| 212 |
+
"""
|
| 213 |
+
try:
|
| 214 |
+
return _default_interpreter_limiter.get()
|
| 215 |
+
except LookupError:
|
| 216 |
+
limiter = CapacityLimiter(os.cpu_count() or DEFAULT_CPU_COUNT)
|
| 217 |
+
_default_interpreter_limiter.set(limiter)
|
| 218 |
+
return limiter
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_process.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import pickle
|
| 5 |
+
import subprocess
|
| 6 |
+
import sys
|
| 7 |
+
from collections import deque
|
| 8 |
+
from collections.abc import Callable
|
| 9 |
+
from importlib.util import module_from_spec, spec_from_file_location
|
| 10 |
+
from typing import TypeVar, cast
|
| 11 |
+
|
| 12 |
+
from ._core._eventloop import current_time, get_async_backend, get_cancelled_exc_class
|
| 13 |
+
from ._core._exceptions import BrokenWorkerProcess
|
| 14 |
+
from ._core._subprocesses import open_process
|
| 15 |
+
from ._core._synchronization import CapacityLimiter
|
| 16 |
+
from ._core._tasks import CancelScope, fail_after
|
| 17 |
+
from .abc import ByteReceiveStream, ByteSendStream, Process
|
| 18 |
+
from .lowlevel import RunVar, checkpoint_if_cancelled
|
| 19 |
+
from .streams.buffered import BufferedByteReceiveStream
|
| 20 |
+
|
| 21 |
+
if sys.version_info >= (3, 11):
|
| 22 |
+
from typing import TypeVarTuple, Unpack
|
| 23 |
+
else:
|
| 24 |
+
from typing_extensions import TypeVarTuple, Unpack
|
| 25 |
+
|
| 26 |
+
WORKER_MAX_IDLE_TIME = 300 # 5 minutes
|
| 27 |
+
|
| 28 |
+
T_Retval = TypeVar("T_Retval")
|
| 29 |
+
PosArgsT = TypeVarTuple("PosArgsT")
|
| 30 |
+
|
| 31 |
+
_process_pool_workers: RunVar[set[Process]] = RunVar("_process_pool_workers")
|
| 32 |
+
_process_pool_idle_workers: RunVar[deque[tuple[Process, float]]] = RunVar(
|
| 33 |
+
"_process_pool_idle_workers"
|
| 34 |
+
)
|
| 35 |
+
_default_process_limiter: RunVar[CapacityLimiter] = RunVar("_default_process_limiter")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
async def run_sync( # type: ignore[return]
|
| 39 |
+
func: Callable[[Unpack[PosArgsT]], T_Retval],
|
| 40 |
+
*args: Unpack[PosArgsT],
|
| 41 |
+
cancellable: bool = False,
|
| 42 |
+
limiter: CapacityLimiter | None = None,
|
| 43 |
+
) -> T_Retval:
|
| 44 |
+
"""
|
| 45 |
+
Call the given function with the given arguments in a worker process.
|
| 46 |
+
|
| 47 |
+
If the ``cancellable`` option is enabled and the task waiting for its completion is
|
| 48 |
+
cancelled, the worker process running it will be abruptly terminated using SIGKILL
|
| 49 |
+
(or ``terminateProcess()`` on Windows).
|
| 50 |
+
|
| 51 |
+
:param func: a callable
|
| 52 |
+
:param args: positional arguments for the callable
|
| 53 |
+
:param cancellable: ``True`` to allow cancellation of the operation while it's
|
| 54 |
+
running
|
| 55 |
+
:param limiter: capacity limiter to use to limit the total amount of processes
|
| 56 |
+
running (if omitted, the default limiter is used)
|
| 57 |
+
:return: an awaitable that yields the return value of the function.
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
async def send_raw_command(pickled_cmd: bytes) -> object:
|
| 62 |
+
try:
|
| 63 |
+
await stdin.send(pickled_cmd)
|
| 64 |
+
response = await buffered.receive_until(b"\n", 50)
|
| 65 |
+
status, length = response.split(b" ")
|
| 66 |
+
if status not in (b"RETURN", b"EXCEPTION"):
|
| 67 |
+
raise RuntimeError(
|
| 68 |
+
f"Worker process returned unexpected response: {response!r}"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
pickled_response = await buffered.receive_exactly(int(length))
|
| 72 |
+
except BaseException as exc:
|
| 73 |
+
workers.discard(process)
|
| 74 |
+
try:
|
| 75 |
+
process.kill()
|
| 76 |
+
with CancelScope(shield=True):
|
| 77 |
+
await process.aclose()
|
| 78 |
+
except ProcessLookupError:
|
| 79 |
+
pass
|
| 80 |
+
|
| 81 |
+
if isinstance(exc, get_cancelled_exc_class()):
|
| 82 |
+
raise
|
| 83 |
+
else:
|
| 84 |
+
raise BrokenWorkerProcess from exc
|
| 85 |
+
|
| 86 |
+
retval = pickle.loads(pickled_response)
|
| 87 |
+
if status == b"EXCEPTION":
|
| 88 |
+
assert isinstance(retval, BaseException)
|
| 89 |
+
raise retval
|
| 90 |
+
else:
|
| 91 |
+
return retval
|
| 92 |
+
|
| 93 |
+
# First pickle the request before trying to reserve a worker process
|
| 94 |
+
await checkpoint_if_cancelled()
|
| 95 |
+
request = pickle.dumps(("run", func, args), protocol=pickle.HIGHEST_PROTOCOL)
|
| 96 |
+
|
| 97 |
+
# If this is the first run in this event loop thread, set up the necessary variables
|
| 98 |
+
try:
|
| 99 |
+
workers = _process_pool_workers.get()
|
| 100 |
+
idle_workers = _process_pool_idle_workers.get()
|
| 101 |
+
except LookupError:
|
| 102 |
+
workers = set()
|
| 103 |
+
idle_workers = deque()
|
| 104 |
+
_process_pool_workers.set(workers)
|
| 105 |
+
_process_pool_idle_workers.set(idle_workers)
|
| 106 |
+
get_async_backend().setup_process_pool_exit_at_shutdown(workers)
|
| 107 |
+
|
| 108 |
+
async with limiter or current_default_process_limiter():
|
| 109 |
+
# Pop processes from the pool (starting from the most recently used) until we
|
| 110 |
+
# find one that hasn't exited yet
|
| 111 |
+
process: Process
|
| 112 |
+
while idle_workers:
|
| 113 |
+
process, idle_since = idle_workers.pop()
|
| 114 |
+
if process.returncode is None:
|
| 115 |
+
stdin = cast(ByteSendStream, process.stdin)
|
| 116 |
+
buffered = BufferedByteReceiveStream(
|
| 117 |
+
cast(ByteReceiveStream, process.stdout)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Prune any other workers that have been idle for WORKER_MAX_IDLE_TIME
|
| 121 |
+
# seconds or longer
|
| 122 |
+
now = current_time()
|
| 123 |
+
killed_processes: list[Process] = []
|
| 124 |
+
while idle_workers:
|
| 125 |
+
if now - idle_workers[0][1] < WORKER_MAX_IDLE_TIME:
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
process_to_kill, idle_since = idle_workers.popleft()
|
| 129 |
+
process_to_kill.kill()
|
| 130 |
+
workers.remove(process_to_kill)
|
| 131 |
+
killed_processes.append(process_to_kill)
|
| 132 |
+
|
| 133 |
+
with CancelScope(shield=True):
|
| 134 |
+
for killed_process in killed_processes:
|
| 135 |
+
await killed_process.aclose()
|
| 136 |
+
|
| 137 |
+
break
|
| 138 |
+
|
| 139 |
+
workers.remove(process)
|
| 140 |
+
else:
|
| 141 |
+
command = [sys.executable, "-u", "-m", __name__]
|
| 142 |
+
process = await open_process(
|
| 143 |
+
command, stdin=subprocess.PIPE, stdout=subprocess.PIPE
|
| 144 |
+
)
|
| 145 |
+
try:
|
| 146 |
+
stdin = cast(ByteSendStream, process.stdin)
|
| 147 |
+
buffered = BufferedByteReceiveStream(
|
| 148 |
+
cast(ByteReceiveStream, process.stdout)
|
| 149 |
+
)
|
| 150 |
+
with fail_after(20):
|
| 151 |
+
message = await buffered.receive(6)
|
| 152 |
+
|
| 153 |
+
if message != b"READY\n":
|
| 154 |
+
raise BrokenWorkerProcess(
|
| 155 |
+
f"Worker process returned unexpected response: {message!r}"
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
main_module_path = getattr(sys.modules["__main__"], "__file__", None)
|
| 159 |
+
pickled = pickle.dumps(
|
| 160 |
+
("init", sys.path, main_module_path),
|
| 161 |
+
protocol=pickle.HIGHEST_PROTOCOL,
|
| 162 |
+
)
|
| 163 |
+
await send_raw_command(pickled)
|
| 164 |
+
except (BrokenWorkerProcess, get_cancelled_exc_class()):
|
| 165 |
+
raise
|
| 166 |
+
except BaseException as exc:
|
| 167 |
+
process.kill()
|
| 168 |
+
raise BrokenWorkerProcess(
|
| 169 |
+
"Error during worker process initialization"
|
| 170 |
+
) from exc
|
| 171 |
+
|
| 172 |
+
workers.add(process)
|
| 173 |
+
|
| 174 |
+
with CancelScope(shield=not cancellable):
|
| 175 |
+
try:
|
| 176 |
+
return cast(T_Retval, await send_raw_command(request))
|
| 177 |
+
finally:
|
| 178 |
+
if process in workers:
|
| 179 |
+
idle_workers.append((process, current_time()))
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def current_default_process_limiter() -> CapacityLimiter:
|
| 183 |
+
"""
|
| 184 |
+
Return the capacity limiter that is used by default to limit the number of worker
|
| 185 |
+
processes.
|
| 186 |
+
|
| 187 |
+
:return: a capacity limiter object
|
| 188 |
+
|
| 189 |
+
"""
|
| 190 |
+
try:
|
| 191 |
+
return _default_process_limiter.get()
|
| 192 |
+
except LookupError:
|
| 193 |
+
limiter = CapacityLimiter(os.cpu_count() or 2)
|
| 194 |
+
_default_process_limiter.set(limiter)
|
| 195 |
+
return limiter
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def process_worker() -> None:
|
| 199 |
+
# Redirect standard streams to os.devnull so that user code won't interfere with the
|
| 200 |
+
# parent-worker communication
|
| 201 |
+
stdin = sys.stdin
|
| 202 |
+
stdout = sys.stdout
|
| 203 |
+
sys.stdin = open(os.devnull)
|
| 204 |
+
sys.stdout = open(os.devnull, "w")
|
| 205 |
+
|
| 206 |
+
stdout.buffer.write(b"READY\n")
|
| 207 |
+
while True:
|
| 208 |
+
retval = exception = None
|
| 209 |
+
try:
|
| 210 |
+
command, *args = pickle.load(stdin.buffer)
|
| 211 |
+
except EOFError:
|
| 212 |
+
return
|
| 213 |
+
except BaseException as exc:
|
| 214 |
+
exception = exc
|
| 215 |
+
else:
|
| 216 |
+
if command == "run":
|
| 217 |
+
func, args = args
|
| 218 |
+
try:
|
| 219 |
+
retval = func(*args)
|
| 220 |
+
except BaseException as exc:
|
| 221 |
+
exception = exc
|
| 222 |
+
elif command == "init":
|
| 223 |
+
main_module_path: str | None
|
| 224 |
+
sys.path, main_module_path = args
|
| 225 |
+
del sys.modules["__main__"]
|
| 226 |
+
if main_module_path and os.path.isfile(main_module_path):
|
| 227 |
+
# Load the parent's main module but as __mp_main__ instead of
|
| 228 |
+
# __main__ (like multiprocessing does) to avoid infinite recursion
|
| 229 |
+
try:
|
| 230 |
+
spec = spec_from_file_location("__mp_main__", main_module_path)
|
| 231 |
+
if spec and spec.loader:
|
| 232 |
+
main = module_from_spec(spec)
|
| 233 |
+
spec.loader.exec_module(main)
|
| 234 |
+
sys.modules["__main__"] = main
|
| 235 |
+
except BaseException as exc:
|
| 236 |
+
exception = exc
|
| 237 |
+
try:
|
| 238 |
+
if exception is not None:
|
| 239 |
+
status = b"EXCEPTION"
|
| 240 |
+
pickled = pickle.dumps(exception, pickle.HIGHEST_PROTOCOL)
|
| 241 |
+
else:
|
| 242 |
+
status = b"RETURN"
|
| 243 |
+
pickled = pickle.dumps(retval, pickle.HIGHEST_PROTOCOL)
|
| 244 |
+
except BaseException as exc:
|
| 245 |
+
exception = exc
|
| 246 |
+
status = b"EXCEPTION"
|
| 247 |
+
pickled = pickle.dumps(exc, pickle.HIGHEST_PROTOCOL)
|
| 248 |
+
|
| 249 |
+
stdout.buffer.write(b"%s %d\n" % (status, len(pickled)))
|
| 250 |
+
stdout.buffer.write(pickled)
|
| 251 |
+
|
| 252 |
+
# Respect SIGTERM
|
| 253 |
+
if isinstance(exception, SystemExit):
|
| 254 |
+
raise exception
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
process_worker()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_thread.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from collections.abc import Callable
|
| 5 |
+
from typing import TypeVar
|
| 6 |
+
from warnings import warn
|
| 7 |
+
|
| 8 |
+
from ._core._eventloop import get_async_backend
|
| 9 |
+
from .abc import CapacityLimiter
|
| 10 |
+
|
| 11 |
+
if sys.version_info >= (3, 11):
|
| 12 |
+
from typing import TypeVarTuple, Unpack
|
| 13 |
+
else:
|
| 14 |
+
from typing_extensions import TypeVarTuple, Unpack
|
| 15 |
+
|
| 16 |
+
T_Retval = TypeVar("T_Retval")
|
| 17 |
+
PosArgsT = TypeVarTuple("PosArgsT")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
async def run_sync(
|
| 21 |
+
func: Callable[[Unpack[PosArgsT]], T_Retval],
|
| 22 |
+
*args: Unpack[PosArgsT],
|
| 23 |
+
abandon_on_cancel: bool = False,
|
| 24 |
+
cancellable: bool | None = None,
|
| 25 |
+
limiter: CapacityLimiter | None = None,
|
| 26 |
+
) -> T_Retval:
|
| 27 |
+
"""
|
| 28 |
+
Call the given function with the given arguments in a worker thread.
|
| 29 |
+
|
| 30 |
+
If the ``cancellable`` option is enabled and the task waiting for its completion is
|
| 31 |
+
cancelled, the thread will still run its course but its return value (or any raised
|
| 32 |
+
exception) will be ignored.
|
| 33 |
+
|
| 34 |
+
:param func: a callable
|
| 35 |
+
:param args: positional arguments for the callable
|
| 36 |
+
:param abandon_on_cancel: ``True`` to abandon the thread (leaving it to run
|
| 37 |
+
unchecked on own) if the host task is cancelled, ``False`` to ignore
|
| 38 |
+
cancellations in the host task until the operation has completed in the worker
|
| 39 |
+
thread
|
| 40 |
+
:param cancellable: deprecated alias of ``abandon_on_cancel``; will override
|
| 41 |
+
``abandon_on_cancel`` if both parameters are passed
|
| 42 |
+
:param limiter: capacity limiter to use to limit the total amount of threads running
|
| 43 |
+
(if omitted, the default limiter is used)
|
| 44 |
+
:return: an awaitable that yields the return value of the function.
|
| 45 |
+
|
| 46 |
+
"""
|
| 47 |
+
if cancellable is not None:
|
| 48 |
+
abandon_on_cancel = cancellable
|
| 49 |
+
warn(
|
| 50 |
+
"The `cancellable=` keyword argument to `anyio.to_thread.run_sync` is "
|
| 51 |
+
"deprecated since AnyIO 4.1.0; use `abandon_on_cancel=` instead",
|
| 52 |
+
DeprecationWarning,
|
| 53 |
+
stacklevel=2,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
return await get_async_backend().run_sync_in_worker_thread(
|
| 57 |
+
func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def current_default_thread_limiter() -> CapacityLimiter:
|
| 62 |
+
"""
|
| 63 |
+
Return the capacity limiter that is used by default to limit the number of
|
| 64 |
+
concurrent threads.
|
| 65 |
+
|
| 66 |
+
:return: a capacity limiter object
|
| 67 |
+
|
| 68 |
+
"""
|
| 69 |
+
return get_async_backend().current_default_thread_limiter()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/_compat.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from collections.abc import Mapping
|
| 4 |
+
import sys
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
DATACLASS_KWARGS: Mapping[str, Any]
|
| 8 |
+
if sys.version_info >= (3, 10):
|
| 9 |
+
DATACLASS_KWARGS = {"slots": True}
|
| 10 |
+
else:
|
| 11 |
+
DATACLASS_KWARGS = {}
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/parser_inline.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tokenizes paragraph content.
|
| 2 |
+
"""
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import TYPE_CHECKING, Callable
|
| 6 |
+
|
| 7 |
+
from . import rules_inline
|
| 8 |
+
from .ruler import Ruler
|
| 9 |
+
from .rules_inline.state_inline import StateInline
|
| 10 |
+
from .token import Token
|
| 11 |
+
from .utils import EnvType
|
| 12 |
+
|
| 13 |
+
if TYPE_CHECKING:
|
| 14 |
+
from markdown_it import MarkdownIt
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Parser rules
|
| 18 |
+
RuleFuncInlineType = Callable[[StateInline, bool], bool]
|
| 19 |
+
"""(state: StateInline, silent: bool) -> matched: bool)
|
| 20 |
+
|
| 21 |
+
`silent` disables token generation, useful for lookahead.
|
| 22 |
+
"""
|
| 23 |
+
_rules: list[tuple[str, RuleFuncInlineType]] = [
|
| 24 |
+
("text", rules_inline.text),
|
| 25 |
+
("linkify", rules_inline.linkify),
|
| 26 |
+
("newline", rules_inline.newline),
|
| 27 |
+
("escape", rules_inline.escape),
|
| 28 |
+
("backticks", rules_inline.backtick),
|
| 29 |
+
("strikethrough", rules_inline.strikethrough.tokenize),
|
| 30 |
+
("emphasis", rules_inline.emphasis.tokenize),
|
| 31 |
+
("link", rules_inline.link),
|
| 32 |
+
("image", rules_inline.image),
|
| 33 |
+
("autolink", rules_inline.autolink),
|
| 34 |
+
("html_inline", rules_inline.html_inline),
|
| 35 |
+
("entity", rules_inline.entity),
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
# Note `rule2` ruleset was created specifically for emphasis/strikethrough
|
| 39 |
+
# post-processing and may be changed in the future.
|
| 40 |
+
#
|
| 41 |
+
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
|
| 42 |
+
#
|
| 43 |
+
RuleFuncInline2Type = Callable[[StateInline], None]
|
| 44 |
+
_rules2: list[tuple[str, RuleFuncInline2Type]] = [
|
| 45 |
+
("balance_pairs", rules_inline.link_pairs),
|
| 46 |
+
("strikethrough", rules_inline.strikethrough.postProcess),
|
| 47 |
+
("emphasis", rules_inline.emphasis.postProcess),
|
| 48 |
+
# rules for pairs separate '**' into its own text tokens, which may be left unused,
|
| 49 |
+
# rule below merges unused segments back with the rest of the text
|
| 50 |
+
("fragments_join", rules_inline.fragments_join),
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class ParserInline:
|
| 55 |
+
def __init__(self) -> None:
|
| 56 |
+
self.ruler = Ruler[RuleFuncInlineType]()
|
| 57 |
+
for name, rule in _rules:
|
| 58 |
+
self.ruler.push(name, rule)
|
| 59 |
+
# Second ruler used for post-processing (e.g. in emphasis-like rules)
|
| 60 |
+
self.ruler2 = Ruler[RuleFuncInline2Type]()
|
| 61 |
+
for name, rule2 in _rules2:
|
| 62 |
+
self.ruler2.push(name, rule2)
|
| 63 |
+
|
| 64 |
+
def skipToken(self, state: StateInline) -> None:
|
| 65 |
+
"""Skip single token by running all rules in validation mode;
|
| 66 |
+
returns `True` if any rule reported success
|
| 67 |
+
"""
|
| 68 |
+
ok = False
|
| 69 |
+
pos = state.pos
|
| 70 |
+
rules = self.ruler.getRules("")
|
| 71 |
+
maxNesting = state.md.options["maxNesting"]
|
| 72 |
+
cache = state.cache
|
| 73 |
+
|
| 74 |
+
if pos in cache:
|
| 75 |
+
state.pos = cache[pos]
|
| 76 |
+
return
|
| 77 |
+
|
| 78 |
+
if state.level < maxNesting:
|
| 79 |
+
for rule in rules:
|
| 80 |
+
# Increment state.level and decrement it later to limit recursion.
|
| 81 |
+
# It's harmless to do here, because no tokens are created.
|
| 82 |
+
# But ideally, we'd need a separate private state variable for this purpose.
|
| 83 |
+
state.level += 1
|
| 84 |
+
ok = rule(state, True)
|
| 85 |
+
state.level -= 1
|
| 86 |
+
if ok:
|
| 87 |
+
break
|
| 88 |
+
else:
|
| 89 |
+
# Too much nesting, just skip until the end of the paragraph.
|
| 90 |
+
#
|
| 91 |
+
# NOTE: this will cause links to behave incorrectly in the following case,
|
| 92 |
+
# when an amount of `[` is exactly equal to `maxNesting + 1`:
|
| 93 |
+
#
|
| 94 |
+
# [[[[[[[[[[[[[[[[[[[[[foo]()
|
| 95 |
+
#
|
| 96 |
+
# TODO: remove this workaround when CM standard will allow nested links
|
| 97 |
+
# (we can replace it by preventing links from being parsed in
|
| 98 |
+
# validation mode)
|
| 99 |
+
#
|
| 100 |
+
state.pos = state.posMax
|
| 101 |
+
|
| 102 |
+
if not ok:
|
| 103 |
+
state.pos += 1
|
| 104 |
+
cache[pos] = state.pos
|
| 105 |
+
|
| 106 |
+
def tokenize(self, state: StateInline) -> None:
|
| 107 |
+
"""Generate tokens for input range."""
|
| 108 |
+
ok = False
|
| 109 |
+
rules = self.ruler.getRules("")
|
| 110 |
+
end = state.posMax
|
| 111 |
+
maxNesting = state.md.options["maxNesting"]
|
| 112 |
+
|
| 113 |
+
while state.pos < end:
|
| 114 |
+
# Try all possible rules.
|
| 115 |
+
# On success, rule should:
|
| 116 |
+
#
|
| 117 |
+
# - update `state.pos`
|
| 118 |
+
# - update `state.tokens`
|
| 119 |
+
# - return true
|
| 120 |
+
|
| 121 |
+
if state.level < maxNesting:
|
| 122 |
+
for rule in rules:
|
| 123 |
+
ok = rule(state, False)
|
| 124 |
+
if ok:
|
| 125 |
+
break
|
| 126 |
+
|
| 127 |
+
if ok:
|
| 128 |
+
if state.pos >= end:
|
| 129 |
+
break
|
| 130 |
+
continue
|
| 131 |
+
|
| 132 |
+
state.pending += state.src[state.pos]
|
| 133 |
+
state.pos += 1
|
| 134 |
+
|
| 135 |
+
if state.pending:
|
| 136 |
+
state.pushPending()
|
| 137 |
+
|
| 138 |
+
def parse(
|
| 139 |
+
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
|
| 140 |
+
) -> list[Token]:
|
| 141 |
+
"""Process input string and push inline tokens into `tokens`"""
|
| 142 |
+
state = StateInline(src, md, env, tokens)
|
| 143 |
+
self.tokenize(state)
|
| 144 |
+
rules2 = self.ruler2.getRules("")
|
| 145 |
+
for rule in rules2:
|
| 146 |
+
rule(state)
|
| 147 |
+
return state.tokens
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/port.yaml
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- package: markdown-it/markdown-it
|
| 2 |
+
version: 13.0.1
|
| 3 |
+
commit: e843acc9edad115cbf8cf85e676443f01658be08
|
| 4 |
+
date: May 3, 2022
|
| 5 |
+
notes:
|
| 6 |
+
- Rename variables that use python built-in names, e.g.
|
| 7 |
+
- `max` -> `maximum`
|
| 8 |
+
- `len` -> `length`
|
| 9 |
+
- `str` -> `string`
|
| 10 |
+
- |
|
| 11 |
+
Convert JS `for` loops to `while` loops
|
| 12 |
+
this is generally the main difference between the codes,
|
| 13 |
+
because in python you can't do e.g. `for {i=1;i<x;i++} {}`
|
| 14 |
+
- |
|
| 15 |
+
`env` is a common Python dictionary, and so does not have attribute access to keys,
|
| 16 |
+
as with JavaScript dictionaries.
|
| 17 |
+
`options` have attribute access only to core markdownit configuration options
|
| 18 |
+
- |
|
| 19 |
+
`Token.attrs` is a dictionary, instead of a list of lists.
|
| 20 |
+
Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
|
| 21 |
+
but in Python 3.7+ order of dictionaries is guaranteed.
|
| 22 |
+
One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
|
| 23 |
+
to manipulate `Token.attrs`, which have an identical signature to those upstream.
|
| 24 |
+
- Use python version of `charCodeAt`
|
| 25 |
+
- |
|
| 26 |
+
Use `str` units instead of `int`s to represent Unicode codepoints.
|
| 27 |
+
This provides a significant performance boost
|
| 28 |
+
- |
|
| 29 |
+
In markdown_it/rules_block/reference.py,
|
| 30 |
+
record line range in state.env["references"] and add state.env["duplicate_refs"]
|
| 31 |
+
This is to allow renderers to report on issues regarding references
|
| 32 |
+
- |
|
| 33 |
+
The `MarkdownIt.__init__` signature is slightly different for updating options,
|
| 34 |
+
since you must always specify the config first, e.g.
|
| 35 |
+
use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
|
| 36 |
+
- The default configuration preset for `MarkdownIt` is "commonmark" not "default"
|
| 37 |
+
- Allow custom renderer to be passed to `MarkdownIt`
|
| 38 |
+
- |
|
| 39 |
+
change render method signatures
|
| 40 |
+
`func(tokens, idx, options, env, slf)` to
|
| 41 |
+
`func(self, tokens, idx, options, env)`
|
| 42 |
+
- |
|
| 43 |
+
Extensions add render methods by format
|
| 44 |
+
`MarkdownIt.add_render_rule(name, function, fmt="html")`,
|
| 45 |
+
rather than `MarkdownIt.renderer.rules[name] = function`
|
| 46 |
+
and renderers should declare a class property `__output__ = "html"`.
|
| 47 |
+
This allows for extensibility to more than just HTML renderers
|
| 48 |
+
- inline tokens in tables are assigned a map (this is helpful for propagation to children)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/token.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from collections.abc import Callable, MutableMapping
|
| 4 |
+
import dataclasses as dc
|
| 5 |
+
from typing import Any, Literal
|
| 6 |
+
import warnings
|
| 7 |
+
|
| 8 |
+
from markdown_it._compat import DATACLASS_KWARGS
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def convert_attrs(value: Any) -> Any:
|
| 12 |
+
"""Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict.
|
| 13 |
+
|
| 14 |
+
This improves compatibility with upstream markdown-it.
|
| 15 |
+
"""
|
| 16 |
+
if not value:
|
| 17 |
+
return {}
|
| 18 |
+
if isinstance(value, list):
|
| 19 |
+
return dict(value)
|
| 20 |
+
return value
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dc.dataclass(**DATACLASS_KWARGS)
|
| 24 |
+
class Token:
|
| 25 |
+
type: str
|
| 26 |
+
"""Type of the token (string, e.g. "paragraph_open")"""
|
| 27 |
+
|
| 28 |
+
tag: str
|
| 29 |
+
"""HTML tag name, e.g. 'p'"""
|
| 30 |
+
|
| 31 |
+
nesting: Literal[-1, 0, 1]
|
| 32 |
+
"""Level change (number in {-1, 0, 1} set), where:
|
| 33 |
+
- `1` means the tag is opening
|
| 34 |
+
- `0` means the tag is self-closing
|
| 35 |
+
- `-1` means the tag is closing
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
attrs: dict[str, str | int | float] = dc.field(default_factory=dict)
|
| 39 |
+
"""HTML attributes.
|
| 40 |
+
Note this differs from the upstream "list of lists" format,
|
| 41 |
+
although than an instance can still be initialised with this format.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
map: list[int] | None = None
|
| 45 |
+
"""Source map info. Format: `[ line_begin, line_end ]`"""
|
| 46 |
+
|
| 47 |
+
level: int = 0
|
| 48 |
+
"""Nesting level, the same as `state.level`"""
|
| 49 |
+
|
| 50 |
+
children: list[Token] | None = None
|
| 51 |
+
"""Array of child nodes (inline and img tokens)."""
|
| 52 |
+
|
| 53 |
+
content: str = ""
|
| 54 |
+
"""Inner content, in the case of a self-closing tag (code, html, fence, etc.),"""
|
| 55 |
+
|
| 56 |
+
markup: str = ""
|
| 57 |
+
"""'*' or '_' for emphasis, fence string for fence, etc."""
|
| 58 |
+
|
| 59 |
+
info: str = ""
|
| 60 |
+
"""Additional information:
|
| 61 |
+
- Info string for "fence" tokens
|
| 62 |
+
- The value "auto" for autolink "link_open" and "link_close" tokens
|
| 63 |
+
- The string value of the item marker for ordered-list "list_item_open" tokens
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
meta: dict[Any, Any] = dc.field(default_factory=dict)
|
| 67 |
+
"""A place for plugins to store any arbitrary data"""
|
| 68 |
+
|
| 69 |
+
block: bool = False
|
| 70 |
+
"""True for block-level tokens, false for inline tokens.
|
| 71 |
+
Used in renderer to calculate line breaks
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
hidden: bool = False
|
| 75 |
+
"""If true, ignore this element when rendering.
|
| 76 |
+
Used for tight lists to hide paragraphs.
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
def __post_init__(self) -> None:
|
| 80 |
+
self.attrs = convert_attrs(self.attrs)
|
| 81 |
+
|
| 82 |
+
def attrIndex(self, name: str) -> int:
|
| 83 |
+
warnings.warn( # noqa: B028
|
| 84 |
+
"Token.attrIndex should not be used, since Token.attrs is a dictionary",
|
| 85 |
+
UserWarning,
|
| 86 |
+
)
|
| 87 |
+
if name not in self.attrs:
|
| 88 |
+
return -1
|
| 89 |
+
return list(self.attrs.keys()).index(name)
|
| 90 |
+
|
| 91 |
+
def attrItems(self) -> list[tuple[str, str | int | float]]:
|
| 92 |
+
"""Get (key, value) list of attrs."""
|
| 93 |
+
return list(self.attrs.items())
|
| 94 |
+
|
| 95 |
+
def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
|
| 96 |
+
"""Add `[ name, value ]` attribute to list. Init attrs if necessary."""
|
| 97 |
+
name, value = attrData
|
| 98 |
+
self.attrSet(name, value)
|
| 99 |
+
|
| 100 |
+
def attrSet(self, name: str, value: str | int | float) -> None:
|
| 101 |
+
"""Set `name` attribute to `value`. Override old value if exists."""
|
| 102 |
+
self.attrs[name] = value
|
| 103 |
+
|
| 104 |
+
def attrGet(self, name: str) -> None | str | int | float:
|
| 105 |
+
"""Get the value of attribute `name`, or null if it does not exist."""
|
| 106 |
+
return self.attrs.get(name, None)
|
| 107 |
+
|
| 108 |
+
def attrJoin(self, name: str, value: str) -> None:
|
| 109 |
+
"""Join value to existing attribute via space.
|
| 110 |
+
Or create new attribute if not exists.
|
| 111 |
+
Useful to operate with token classes.
|
| 112 |
+
"""
|
| 113 |
+
if name in self.attrs:
|
| 114 |
+
current = self.attrs[name]
|
| 115 |
+
if not isinstance(current, str):
|
| 116 |
+
raise TypeError(
|
| 117 |
+
f"existing attr 'name' is not a str: {self.attrs[name]}"
|
| 118 |
+
)
|
| 119 |
+
self.attrs[name] = f"{current} {value}"
|
| 120 |
+
else:
|
| 121 |
+
self.attrs[name] = value
|
| 122 |
+
|
| 123 |
+
def copy(self, **changes: Any) -> Token:
|
| 124 |
+
"""Return a shallow copy of the instance."""
|
| 125 |
+
return dc.replace(self, **changes)
|
| 126 |
+
|
| 127 |
+
def as_dict(
|
| 128 |
+
self,
|
| 129 |
+
*,
|
| 130 |
+
children: bool = True,
|
| 131 |
+
as_upstream: bool = True,
|
| 132 |
+
meta_serializer: Callable[[dict[Any, Any]], Any] | None = None,
|
| 133 |
+
filter: Callable[[str, Any], bool] | None = None,
|
| 134 |
+
dict_factory: Callable[..., MutableMapping[str, Any]] = dict,
|
| 135 |
+
) -> MutableMapping[str, Any]:
|
| 136 |
+
"""Return the token as a dictionary.
|
| 137 |
+
|
| 138 |
+
:param children: Also convert children to dicts
|
| 139 |
+
:param as_upstream: Ensure the output dictionary is equal to that created by markdown-it
|
| 140 |
+
For example, attrs are converted to null or lists
|
| 141 |
+
:param meta_serializer: hook for serializing ``Token.meta``
|
| 142 |
+
:param filter: A callable whose return code determines whether an
|
| 143 |
+
attribute or element is included (``True``) or dropped (``False``).
|
| 144 |
+
Is called with the (key, value) pair.
|
| 145 |
+
:param dict_factory: A callable to produce dictionaries from.
|
| 146 |
+
For example, to produce ordered dictionaries instead of normal Python
|
| 147 |
+
dictionaries, pass in ``collections.OrderedDict``.
|
| 148 |
+
|
| 149 |
+
"""
|
| 150 |
+
mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self))
|
| 151 |
+
if filter:
|
| 152 |
+
mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v))
|
| 153 |
+
if as_upstream and "attrs" in mapping:
|
| 154 |
+
mapping["attrs"] = (
|
| 155 |
+
None
|
| 156 |
+
if not mapping["attrs"]
|
| 157 |
+
else [[k, v] for k, v in mapping["attrs"].items()]
|
| 158 |
+
)
|
| 159 |
+
if meta_serializer and "meta" in mapping:
|
| 160 |
+
mapping["meta"] = meta_serializer(mapping["meta"])
|
| 161 |
+
if children and mapping.get("children", None):
|
| 162 |
+
mapping["children"] = [
|
| 163 |
+
child.as_dict(
|
| 164 |
+
children=children,
|
| 165 |
+
filter=filter,
|
| 166 |
+
dict_factory=dict_factory,
|
| 167 |
+
as_upstream=as_upstream,
|
| 168 |
+
meta_serializer=meta_serializer,
|
| 169 |
+
)
|
| 170 |
+
for child in mapping["children"]
|
| 171 |
+
]
|
| 172 |
+
return mapping
|
| 173 |
+
|
| 174 |
+
@classmethod
|
| 175 |
+
def from_dict(cls, dct: MutableMapping[str, Any]) -> Token:
|
| 176 |
+
"""Convert a dict to a Token."""
|
| 177 |
+
token = cls(**dct)
|
| 178 |
+
if token.children:
|
| 179 |
+
token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type]
|
| 180 |
+
return token
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/LICENSE
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
Apache License
|
| 3 |
+
Version 2.0, January 2004
|
| 4 |
+
http://www.apache.org/licenses/
|
| 5 |
+
|
| 6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 7 |
+
|
| 8 |
+
1. Definitions.
|
| 9 |
+
|
| 10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 12 |
+
|
| 13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 14 |
+
the copyright owner that is granting the License.
|
| 15 |
+
|
| 16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 17 |
+
other entities that control, are controlled by, or are under common
|
| 18 |
+
control with that entity. For the purposes of this definition,
|
| 19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 20 |
+
direction or management of such entity, whether by contract or
|
| 21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 23 |
+
|
| 24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 25 |
+
exercising permissions granted by this License.
|
| 26 |
+
|
| 27 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 28 |
+
including but not limited to software source code, documentation
|
| 29 |
+
source, and configuration files.
|
| 30 |
+
|
| 31 |
+
"Object" form shall mean any form resulting from mechanical
|
| 32 |
+
transformation or translation of a Source form, including but
|
| 33 |
+
not limited to compiled object code, generated documentation,
|
| 34 |
+
and conversions to other media types.
|
| 35 |
+
|
| 36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 37 |
+
Object form, made available under the License, as indicated by a
|
| 38 |
+
copyright notice that is included in or attached to the work
|
| 39 |
+
(an example is provided in the Appendix below).
|
| 40 |
+
|
| 41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 42 |
+
form, that is based on (or derived from) the Work and for which the
|
| 43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 45 |
+
of this License, Derivative Works shall not include works that remain
|
| 46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 47 |
+
the Work and Derivative Works thereof.
|
| 48 |
+
|
| 49 |
+
"Contribution" shall mean any work of authorship, including
|
| 50 |
+
the original version of the Work and any modifications or additions
|
| 51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 55 |
+
means any form of electronic, verbal, or written communication sent
|
| 56 |
+
to the Licensor or its representatives, including but not limited to
|
| 57 |
+
communication on electronic mailing lists, source code control systems,
|
| 58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 60 |
+
excluding communication that is conspicuously marked or otherwise
|
| 61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 62 |
+
|
| 63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 65 |
+
subsequently incorporated within the Work.
|
| 66 |
+
|
| 67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 72 |
+
Work and such Derivative Works in Source or Object form.
|
| 73 |
+
|
| 74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 77 |
+
(except as stated in this section) patent license to make, have made,
|
| 78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 79 |
+
where such license applies only to those patent claims licensable
|
| 80 |
+
by such Contributor that are necessarily infringed by their
|
| 81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 83 |
+
institute patent litigation against any entity (including a
|
| 84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 85 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 86 |
+
or contributory patent infringement, then any patent licenses
|
| 87 |
+
granted to You under this License for that Work shall terminate
|
| 88 |
+
as of the date such litigation is filed.
|
| 89 |
+
|
| 90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 91 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 92 |
+
modifications, and in Source or Object form, provided that You
|
| 93 |
+
meet the following conditions:
|
| 94 |
+
|
| 95 |
+
(a) You must give any other recipients of the Work or
|
| 96 |
+
Derivative Works a copy of this License; and
|
| 97 |
+
|
| 98 |
+
(b) You must cause any modified files to carry prominent notices
|
| 99 |
+
stating that You changed the files; and
|
| 100 |
+
|
| 101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 102 |
+
that You distribute, all copyright, patent, trademark, and
|
| 103 |
+
attribution notices from the Source form of the Work,
|
| 104 |
+
excluding those notices that do not pertain to any part of
|
| 105 |
+
the Derivative Works; and
|
| 106 |
+
|
| 107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 108 |
+
distribution, then any Derivative Works that You distribute must
|
| 109 |
+
include a readable copy of the attribution notices contained
|
| 110 |
+
within such NOTICE file, excluding those notices that do not
|
| 111 |
+
pertain to any part of the Derivative Works, in at least one
|
| 112 |
+
of the following places: within a NOTICE text file distributed
|
| 113 |
+
as part of the Derivative Works; within the Source form or
|
| 114 |
+
documentation, if provided along with the Derivative Works; or,
|
| 115 |
+
within a display generated by the Derivative Works, if and
|
| 116 |
+
wherever such third-party notices normally appear. The contents
|
| 117 |
+
of the NOTICE file are for informational purposes only and
|
| 118 |
+
do not modify the License. You may add Your own attribution
|
| 119 |
+
notices within Derivative Works that You distribute, alongside
|
| 120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 121 |
+
that such additional attribution notices cannot be construed
|
| 122 |
+
as modifying the License.
|
| 123 |
+
|
| 124 |
+
You may add Your own copyright statement to Your modifications and
|
| 125 |
+
may provide additional or different license terms and conditions
|
| 126 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 127 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 128 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 129 |
+
the conditions stated in this License.
|
| 130 |
+
|
| 131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 133 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 134 |
+
this License, without any additional terms or conditions.
|
| 135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 136 |
+
the terms of any separate license agreement you may have executed
|
| 137 |
+
with Licensor regarding such Contributions.
|
| 138 |
+
|
| 139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 141 |
+
except as required for reasonable and customary use in describing the
|
| 142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 143 |
+
|
| 144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 145 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 148 |
+
implied, including, without limitation, any warranties or conditions
|
| 149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 151 |
+
appropriateness of using or redistributing the Work and assume any
|
| 152 |
+
risks associated with Your exercise of permissions under this License.
|
| 153 |
+
|
| 154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 155 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 156 |
+
unless required by applicable law (such as deliberate and grossly
|
| 157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 158 |
+
liable to You for damages, including any direct, indirect, special,
|
| 159 |
+
incidental, or consequential damages of any character arising as a
|
| 160 |
+
result of this License or out of the use or inability to use the
|
| 161 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 162 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 163 |
+
other commercial damages or losses), even if such Contributor
|
| 164 |
+
has been advised of the possibility of such damages.
|
| 165 |
+
|
| 166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 169 |
+
or other liability obligations and/or rights consistent with this
|
| 170 |
+
License. However, in accepting such obligations, You may act only
|
| 171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 172 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 173 |
+
defend, and hold each Contributor harmless for any liability
|
| 174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 175 |
+
of your accepting any such warranty or additional liability.
|
| 176 |
+
|
| 177 |
+
END OF TERMS AND CONDITIONS
|
| 178 |
+
|
| 179 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 180 |
+
|
| 181 |
+
To apply the Apache License to your work, attach the following
|
| 182 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 183 |
+
replaced with your own identifying information. (Don't include
|
| 184 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 185 |
+
comment syntax for the file format. We also recommend that a
|
| 186 |
+
file or class name and description of purpose be included on the
|
| 187 |
+
same "printed page" as the copyright notice for easier
|
| 188 |
+
identification within third-party archives.
|
| 189 |
+
|
| 190 |
+
Copyright [yyyy] [name of copyright owner]
|
| 191 |
+
|
| 192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 193 |
+
you may not use this file except in compliance with the License.
|
| 194 |
+
You may obtain a copy of the License at
|
| 195 |
+
|
| 196 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 197 |
+
|
| 198 |
+
Unless required by applicable law or agreed to in writing, software
|
| 199 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 200 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 201 |
+
See the License for the specific language governing permissions and
|
| 202 |
+
limitations under the License.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/METADATA
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.2
|
| 2 |
+
Name: propcache
|
| 3 |
+
Version: 0.3.0
|
| 4 |
+
Summary: Accelerated property cache
|
| 5 |
+
Home-page: https://github.com/aio-libs/propcache
|
| 6 |
+
Author: Andrew Svetlov
|
| 7 |
+
Author-email: andrew.svetlov@gmail.com
|
| 8 |
+
Maintainer: aiohttp team <team@aiohttp.org>
|
| 9 |
+
Maintainer-email: team@aiohttp.org
|
| 10 |
+
License: Apache-2.0
|
| 11 |
+
Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org
|
| 12 |
+
Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org
|
| 13 |
+
Project-URL: CI: GitHub Workflows, https://github.com/aio-libs/propcache/actions?query=branch:master
|
| 14 |
+
Project-URL: Code of Conduct, https://github.com/aio-libs/.github/blob/master/CODE_OF_CONDUCT.md
|
| 15 |
+
Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/propcache
|
| 16 |
+
Project-URL: Docs: Changelog, https://propcache.readthedocs.io/en/latest/changes/
|
| 17 |
+
Project-URL: Docs: RTD, https://propcache.readthedocs.io
|
| 18 |
+
Project-URL: GitHub: issues, https://github.com/aio-libs/propcache/issues
|
| 19 |
+
Project-URL: GitHub: repo, https://github.com/aio-libs/propcache
|
| 20 |
+
Keywords: cython,cext,propcache
|
| 21 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 22 |
+
Classifier: Intended Audience :: Developers
|
| 23 |
+
Classifier: License :: OSI Approved :: Apache Software License
|
| 24 |
+
Classifier: Programming Language :: Cython
|
| 25 |
+
Classifier: Programming Language :: Python
|
| 26 |
+
Classifier: Programming Language :: Python :: 3
|
| 27 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 28 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 29 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 30 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 31 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 32 |
+
Classifier: Topic :: Internet :: WWW/HTTP
|
| 33 |
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
| 34 |
+
Requires-Python: >=3.9
|
| 35 |
+
Description-Content-Type: text/x-rst
|
| 36 |
+
License-File: LICENSE
|
| 37 |
+
License-File: NOTICE
|
| 38 |
+
|
| 39 |
+
propcache
|
| 40 |
+
=========
|
| 41 |
+
|
| 42 |
+
The module provides a fast implementation of cached properties for Python 3.9+.
|
| 43 |
+
|
| 44 |
+
.. image:: https://github.com/aio-libs/propcache/actions/workflows/ci-cd.yml/badge.svg
|
| 45 |
+
:target: https://github.com/aio-libs/propcache/actions?query=workflow%3ACI
|
| 46 |
+
:align: right
|
| 47 |
+
|
| 48 |
+
.. image:: https://codecov.io/gh/aio-libs/propcache/branch/master/graph/badge.svg
|
| 49 |
+
:target: https://codecov.io/gh/aio-libs/propcache
|
| 50 |
+
|
| 51 |
+
.. image:: https://badge.fury.io/py/propcache.svg
|
| 52 |
+
:target: https://badge.fury.io/py/propcache
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
.. image:: https://readthedocs.org/projects/propcache/badge/?version=latest
|
| 56 |
+
:target: https://propcache.readthedocs.io
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
.. image:: https://img.shields.io/pypi/pyversions/propcache.svg
|
| 60 |
+
:target: https://pypi.python.org/pypi/propcache
|
| 61 |
+
|
| 62 |
+
.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
|
| 63 |
+
:target: https://matrix.to/#/%23aio-libs:matrix.org
|
| 64 |
+
:alt: Matrix Room — #aio-libs:matrix.org
|
| 65 |
+
|
| 66 |
+
.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
|
| 67 |
+
:target: https://matrix.to/#/%23aio-libs-space:matrix.org
|
| 68 |
+
:alt: Matrix Space — #aio-libs-space:matrix.org
|
| 69 |
+
|
| 70 |
+
Introduction
|
| 71 |
+
------------
|
| 72 |
+
|
| 73 |
+
The API is designed to be nearly identical to the built-in ``functools.cached_property`` class,
|
| 74 |
+
except for the additional ``under_cached_property`` class which uses ``self._cache``
|
| 75 |
+
instead of ``self.__dict__`` to store the cached values and prevents ``__set__`` from being called.
|
| 76 |
+
|
| 77 |
+
For full documentation please read https://propcache.readthedocs.io.
|
| 78 |
+
|
| 79 |
+
Installation
|
| 80 |
+
------------
|
| 81 |
+
|
| 82 |
+
::
|
| 83 |
+
|
| 84 |
+
$ pip install propcache
|
| 85 |
+
|
| 86 |
+
The library is Python 3 only!
|
| 87 |
+
|
| 88 |
+
PyPI contains binary wheels for Linux, Windows and MacOS. If you want to install
|
| 89 |
+
``propcache`` on another operating system where wheels are not provided,
|
| 90 |
+
the the tarball will be used to compile the library from
|
| 91 |
+
the source code. It requires a C compiler and and Python headers installed.
|
| 92 |
+
|
| 93 |
+
To skip the compilation you must explicitly opt-in by using a PEP 517
|
| 94 |
+
configuration setting ``pure-python``, or setting the ``PROPCACHE_NO_EXTENSIONS``
|
| 95 |
+
environment variable to a non-empty value, e.g.:
|
| 96 |
+
|
| 97 |
+
.. code-block:: console
|
| 98 |
+
|
| 99 |
+
$ pip install propcache --config-settings=pure-python=false
|
| 100 |
+
|
| 101 |
+
Please note that the pure-Python (uncompiled) version is much slower. However,
|
| 102 |
+
PyPy always uses a pure-Python implementation, and, as such, it is unaffected
|
| 103 |
+
by this variable.
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
API documentation
|
| 107 |
+
------------------
|
| 108 |
+
|
| 109 |
+
The documentation is located at https://propcache.readthedocs.io.
|
| 110 |
+
|
| 111 |
+
Source code
|
| 112 |
+
-----------
|
| 113 |
+
|
| 114 |
+
The project is hosted on GitHub_
|
| 115 |
+
|
| 116 |
+
Please file an issue on the `bug tracker
|
| 117 |
+
<https://github.com/aio-libs/propcache/issues>`_ if you have found a bug
|
| 118 |
+
or have some suggestion in order to improve the library.
|
| 119 |
+
|
| 120 |
+
Discussion list
|
| 121 |
+
---------------
|
| 122 |
+
|
| 123 |
+
*aio-libs* google group: https://groups.google.com/forum/#!forum/aio-libs
|
| 124 |
+
|
| 125 |
+
Feel free to post your questions and ideas here.
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
Authors and License
|
| 129 |
+
-------------------
|
| 130 |
+
|
| 131 |
+
The ``propcache`` package is derived from ``yarl`` which is written by Andrew Svetlov.
|
| 132 |
+
|
| 133 |
+
It's *Apache 2* licensed and freely available.
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
.. _GitHub: https://github.com/aio-libs/propcache
|
| 137 |
+
|
| 138 |
+
=========
|
| 139 |
+
Changelog
|
| 140 |
+
=========
|
| 141 |
+
|
| 142 |
+
..
|
| 143 |
+
You should *NOT* be adding new change log entries to this file, this
|
| 144 |
+
file is managed by towncrier. You *may* edit previous change logs to
|
| 145 |
+
fix problems like typo corrections or such.
|
| 146 |
+
To add a new change log entry, please see
|
| 147 |
+
https://pip.pypa.io/en/latest/development/#adding-a-news-entry
|
| 148 |
+
we named the news folder "changes".
|
| 149 |
+
|
| 150 |
+
WARNING: Don't drop the next directive!
|
| 151 |
+
|
| 152 |
+
.. towncrier release notes start
|
| 153 |
+
|
| 154 |
+
0.3.0
|
| 155 |
+
=====
|
| 156 |
+
|
| 157 |
+
*(2025-02-20)*
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
Features
|
| 161 |
+
--------
|
| 162 |
+
|
| 163 |
+
- Implemented support for the free-threaded build of CPython 3.13 -- by `@lysnikolaou <https://github.com/sponsors/lysnikolaou>`__.
|
| 164 |
+
|
| 165 |
+
*Related issues and pull requests on GitHub:*
|
| 166 |
+
`#84 <https://github.com/aio-libs/propcache/issues/84>`__.
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
Packaging updates and notes for downstreams
|
| 170 |
+
-------------------------------------------
|
| 171 |
+
|
| 172 |
+
- Started building wheels for the free-threaded build of CPython 3.13 -- by `@lysnikolaou <https://github.com/sponsors/lysnikolaou>`__.
|
| 173 |
+
|
| 174 |
+
*Related issues and pull requests on GitHub:*
|
| 175 |
+
`#84 <https://github.com/aio-libs/propcache/issues/84>`__.
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
Contributor-facing changes
|
| 179 |
+
--------------------------
|
| 180 |
+
|
| 181 |
+
- GitHub Actions CI/CD is now configured to manage caching pip-ecosystem
|
| 182 |
+
dependencies using `re-actors/cache-python-deps`_ -- an action by
|
| 183 |
+
`@webknjaz <https://github.com/sponsors/webknjaz>`__ that takes into account ABI stability and the exact
|
| 184 |
+
version of Python runtime.
|
| 185 |
+
|
| 186 |
+
.. _`re-actors/cache-python-deps`:
|
| 187 |
+
https://github.com/marketplace/actions/cache-python-deps
|
| 188 |
+
|
| 189 |
+
*Related issues and pull requests on GitHub:*
|
| 190 |
+
`#93 <https://github.com/aio-libs/propcache/issues/93>`__.
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
----
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
0.2.1
|
| 197 |
+
=====
|
| 198 |
+
|
| 199 |
+
*(2024-12-01)*
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
Bug fixes
|
| 203 |
+
---------
|
| 204 |
+
|
| 205 |
+
- Stopped implicitly allowing the use of Cython pre-release versions when
|
| 206 |
+
building the distribution package -- by `@ajsanchezsanz <https://github.com/sponsors/ajsanchezsanz>`__ and
|
| 207 |
+
`@markgreene74 <https://github.com/sponsors/markgreene74>`__.
|
| 208 |
+
|
| 209 |
+
*Related commits on GitHub:*
|
| 210 |
+
`64df0a6 <https://github.com/aio-libs/propcache/commit/64df0a6>`__.
|
| 211 |
+
|
| 212 |
+
- Fixed ``wrapped`` and ``func`` not being accessible in the Cython versions of ``propcache.api.cached_property`` and ``propcache.api.under_cached_property`` decorators -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 213 |
+
|
| 214 |
+
*Related issues and pull requests on GitHub:*
|
| 215 |
+
`#72 <https://github.com/aio-libs/propcache/issues/72>`__.
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
Removals and backward incompatible breaking changes
|
| 219 |
+
---------------------------------------------------
|
| 220 |
+
|
| 221 |
+
- Removed support for Python 3.8 as it has reached end of life -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 222 |
+
|
| 223 |
+
*Related issues and pull requests on GitHub:*
|
| 224 |
+
`#57 <https://github.com/aio-libs/propcache/issues/57>`__.
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
Packaging updates and notes for downstreams
|
| 228 |
+
-------------------------------------------
|
| 229 |
+
|
| 230 |
+
- Stopped implicitly allowing the use of Cython pre-release versions when
|
| 231 |
+
building the distribution package -- by `@ajsanchezsanz <https://github.com/sponsors/ajsanchezsanz>`__ and
|
| 232 |
+
`@markgreene74 <https://github.com/sponsors/markgreene74>`__.
|
| 233 |
+
|
| 234 |
+
*Related commits on GitHub:*
|
| 235 |
+
`64df0a6 <https://github.com/aio-libs/propcache/commit/64df0a6>`__.
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
----
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
0.2.0
|
| 242 |
+
=====
|
| 243 |
+
|
| 244 |
+
*(2024-10-07)*
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
Bug fixes
|
| 248 |
+
---------
|
| 249 |
+
|
| 250 |
+
- Fixed loading the C-extensions on Python 3.8 -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 251 |
+
|
| 252 |
+
*Related issues and pull requests on GitHub:*
|
| 253 |
+
`#26 <https://github.com/aio-libs/propcache/issues/26>`__.
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
Features
|
| 257 |
+
--------
|
| 258 |
+
|
| 259 |
+
- Improved typing for the ``propcache.api.under_cached_property`` decorator -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 260 |
+
|
| 261 |
+
*Related issues and pull requests on GitHub:*
|
| 262 |
+
`#38 <https://github.com/aio-libs/propcache/issues/38>`__.
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
Improved documentation
|
| 266 |
+
----------------------
|
| 267 |
+
|
| 268 |
+
- Added API documentation for the ``propcache.api.cached_property`` and ``propcache.api.under_cached_property`` decorators -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 269 |
+
|
| 270 |
+
*Related issues and pull requests on GitHub:*
|
| 271 |
+
`#16 <https://github.com/aio-libs/propcache/issues/16>`__.
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
Packaging updates and notes for downstreams
|
| 275 |
+
-------------------------------------------
|
| 276 |
+
|
| 277 |
+
- Moved ``propcache.api.under_cached_property`` and ``propcache.api.cached_property`` to `propcache.api` -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 278 |
+
|
| 279 |
+
Both decorators remain importable from the top-level package, however importing from `propcache.api` is now the recommended way to use them.
|
| 280 |
+
|
| 281 |
+
*Related issues and pull requests on GitHub:*
|
| 282 |
+
`#19 <https://github.com/aio-libs/propcache/issues/19>`__, `#24 <https://github.com/aio-libs/propcache/issues/24>`__, `#32 <https://github.com/aio-libs/propcache/issues/32>`__.
|
| 283 |
+
|
| 284 |
+
- Converted project to use a src layout -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 285 |
+
|
| 286 |
+
*Related issues and pull requests on GitHub:*
|
| 287 |
+
`#22 <https://github.com/aio-libs/propcache/issues/22>`__, `#29 <https://github.com/aio-libs/propcache/issues/29>`__, `#37 <https://github.com/aio-libs/propcache/issues/37>`__.
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
----
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
0.1.0
|
| 294 |
+
=====
|
| 295 |
+
|
| 296 |
+
*(2024-10-03)*
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
Features
|
| 300 |
+
--------
|
| 301 |
+
|
| 302 |
+
- Added ``armv7l`` wheels -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
|
| 303 |
+
|
| 304 |
+
*Related issues and pull requests on GitHub:*
|
| 305 |
+
`#5 <https://github.com/aio-libs/propcache/issues/5>`__.
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
----
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
0.0.0
|
| 312 |
+
=====
|
| 313 |
+
|
| 314 |
+
*(2024-10-02)*
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
- Initial release.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/NOTICE
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright 2016-2021, Andrew Svetlov and aio-libs team
|
| 2 |
+
|
| 3 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
you may not use this file except in compliance with the License.
|
| 5 |
+
You may obtain a copy of the License at
|
| 6 |
+
|
| 7 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
|
| 9 |
+
Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
See the License for the specific language governing permissions and
|
| 13 |
+
limitations under the License.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/RECORD
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
propcache-0.3.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
propcache-0.3.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
| 3 |
+
propcache-0.3.0.dist-info/METADATA,sha256=EVZE30m1bHCrcoZ2RLtvfNkdXyHIInAIG39SNZEjLyE,10357
|
| 4 |
+
propcache-0.3.0.dist-info/NOTICE,sha256=VtasbIEFwKUTBMIdsGDjYa-ajqCvmnXCOcKLXRNpODg,609
|
| 5 |
+
propcache-0.3.0.dist-info/RECORD,,
|
| 6 |
+
propcache-0.3.0.dist-info/WHEEL,sha256=siqMuoWpRueIZ87ijidBxnOwHeSOOcxNwYCs-pC4Yv0,151
|
| 7 |
+
propcache-0.3.0.dist-info/top_level.txt,sha256=pVF_GbqSAITPMiX27kfU3QP9-ufhRvkADmudDxWdF3w,10
|
| 8 |
+
propcache/__init__.py,sha256=82yOKjJMHwsj2IpsIfiuDumvBEOckaz2HB823YDJH4Y,965
|
| 9 |
+
propcache/__pycache__/__init__.cpython-312.pyc,,
|
| 10 |
+
propcache/__pycache__/_helpers.cpython-312.pyc,,
|
| 11 |
+
propcache/__pycache__/_helpers_py.cpython-312.pyc,,
|
| 12 |
+
propcache/__pycache__/api.cpython-312.pyc,,
|
| 13 |
+
propcache/_helpers.py,sha256=8CnlWmfTM6RDbMvNDXwL-VMHWiwIUjG8nbeqmvRsbh8,1579
|
| 14 |
+
propcache/_helpers_c.cpython-312-x86_64-linux-gnu.so,sha256=mqnzbvcdqj2KOhx9xGMMjatXLlEuB69xfnsCF5O9G8I,844512
|
| 15 |
+
propcache/_helpers_c.pyx,sha256=9UqfhVrbbkiZDGtEPFEOfT7qghPjAkNtJpgI1JYUPao,2518
|
| 16 |
+
propcache/_helpers_py.py,sha256=jnK6W43iETLcW-A1WMroGUKnElzX8Drw2UQfbEqLlI8,1637
|
| 17 |
+
propcache/api.py,sha256=wvgB-ypkkI5uf72VVYl2NFGc_TnzUQA2CxC7dTlL5ak,179
|
| 18 |
+
propcache/py.typed,sha256=ay5OMO475PlcZ_Fbun9maHW7Y6MBTk0UXL4ztHx3Iug,14
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: setuptools (75.8.0)
|
| 3 |
+
Root-Is-Purelib: false
|
| 4 |
+
Tag: cp312-cp312-manylinux_2_17_x86_64
|
| 5 |
+
Tag: cp312-cp312-manylinux2014_x86_64
|
| 6 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
propcache
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/__init__.py
ADDED
|
@@ -0,0 +1,437 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# flake8: noqa
|
| 19 |
+
|
| 20 |
+
"""
|
| 21 |
+
PyArrow is the python implementation of Apache Arrow.
|
| 22 |
+
|
| 23 |
+
Apache Arrow is a cross-language development platform for in-memory data.
|
| 24 |
+
It specifies a standardized language-independent columnar memory format for
|
| 25 |
+
flat and hierarchical data, organized for efficient analytic operations on
|
| 26 |
+
modern hardware. It also provides computational libraries and zero-copy
|
| 27 |
+
streaming messaging and interprocess communication.
|
| 28 |
+
|
| 29 |
+
For more information see the official page at https://arrow.apache.org
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
import gc as _gc
|
| 33 |
+
import importlib as _importlib
|
| 34 |
+
import os as _os
|
| 35 |
+
import platform as _platform
|
| 36 |
+
import sys as _sys
|
| 37 |
+
import warnings as _warnings
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
from ._generated_version import version as __version__
|
| 41 |
+
except ImportError:
|
| 42 |
+
# Package is not installed, parse git tag at runtime
|
| 43 |
+
try:
|
| 44 |
+
import setuptools_scm
|
| 45 |
+
# Code duplicated from setup.py to avoid a dependency on each other
|
| 46 |
+
|
| 47 |
+
def parse_git(root, **kwargs):
|
| 48 |
+
"""
|
| 49 |
+
Parse function for setuptools_scm that ignores tags for non-C++
|
| 50 |
+
subprojects, e.g. apache-arrow-js-XXX tags.
|
| 51 |
+
"""
|
| 52 |
+
from setuptools_scm.git import parse
|
| 53 |
+
kwargs['describe_command'] = \
|
| 54 |
+
"git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
|
| 55 |
+
return parse(root, **kwargs)
|
| 56 |
+
__version__ = setuptools_scm.get_version('../',
|
| 57 |
+
parse=parse_git)
|
| 58 |
+
except ImportError:
|
| 59 |
+
__version__ = None
|
| 60 |
+
|
| 61 |
+
# ARROW-8684: Disable GC while initializing Cython extension module,
|
| 62 |
+
# to workaround Cython bug in https://github.com/cython/cython/issues/3603
|
| 63 |
+
_gc_enabled = _gc.isenabled()
|
| 64 |
+
_gc.disable()
|
| 65 |
+
import pyarrow.lib as _lib
|
| 66 |
+
if _gc_enabled:
|
| 67 |
+
_gc.enable()
|
| 68 |
+
|
| 69 |
+
from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
|
| 70 |
+
MonthDayNano, VersionInfo, cpp_build_info,
|
| 71 |
+
cpp_version, cpp_version_info, runtime_info,
|
| 72 |
+
cpu_count, set_cpu_count, enable_signal_handlers,
|
| 73 |
+
io_thread_count, set_io_thread_count)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def show_versions():
|
| 77 |
+
"""
|
| 78 |
+
Print various version information, to help with error reporting.
|
| 79 |
+
"""
|
| 80 |
+
def print_entry(label, value):
|
| 81 |
+
print(f"{label: <26}: {value: <8}")
|
| 82 |
+
|
| 83 |
+
print("pyarrow version info\n--------------------")
|
| 84 |
+
print_entry("Package kind", cpp_build_info.package_kind
|
| 85 |
+
if len(cpp_build_info.package_kind) > 0
|
| 86 |
+
else "not indicated")
|
| 87 |
+
print_entry("Arrow C++ library version", cpp_build_info.version)
|
| 88 |
+
print_entry("Arrow C++ compiler",
|
| 89 |
+
f"{cpp_build_info.compiler_id} {cpp_build_info.compiler_version}")
|
| 90 |
+
print_entry("Arrow C++ compiler flags", cpp_build_info.compiler_flags)
|
| 91 |
+
print_entry("Arrow C++ git revision", cpp_build_info.git_id)
|
| 92 |
+
print_entry("Arrow C++ git description", cpp_build_info.git_description)
|
| 93 |
+
print_entry("Arrow C++ build type", cpp_build_info.build_type)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _module_is_available(module):
|
| 97 |
+
try:
|
| 98 |
+
_importlib.import_module(f'pyarrow.{module}')
|
| 99 |
+
except ImportError:
|
| 100 |
+
return False
|
| 101 |
+
else:
|
| 102 |
+
return True
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _filesystem_is_available(fs):
|
| 106 |
+
try:
|
| 107 |
+
import pyarrow.fs
|
| 108 |
+
except ImportError:
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
getattr(pyarrow.fs, fs)
|
| 113 |
+
except (ImportError, AttributeError):
|
| 114 |
+
return False
|
| 115 |
+
else:
|
| 116 |
+
return True
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def show_info():
|
| 120 |
+
"""
|
| 121 |
+
Print detailed version and platform information, for error reporting
|
| 122 |
+
"""
|
| 123 |
+
show_versions()
|
| 124 |
+
|
| 125 |
+
def print_entry(label, value):
|
| 126 |
+
print(f" {label: <20}: {value: <8}")
|
| 127 |
+
|
| 128 |
+
print("\nPlatform:")
|
| 129 |
+
print_entry("OS / Arch", f"{_platform.system()} {_platform.machine()}")
|
| 130 |
+
print_entry("SIMD Level", runtime_info().simd_level)
|
| 131 |
+
print_entry("Detected SIMD Level", runtime_info().detected_simd_level)
|
| 132 |
+
|
| 133 |
+
pool = default_memory_pool()
|
| 134 |
+
print("\nMemory:")
|
| 135 |
+
print_entry("Default backend", pool.backend_name)
|
| 136 |
+
print_entry("Bytes allocated", f"{pool.bytes_allocated()} bytes")
|
| 137 |
+
print_entry("Max memory", f"{pool.max_memory()} bytes")
|
| 138 |
+
print_entry("Supported Backends", ', '.join(supported_memory_backends()))
|
| 139 |
+
|
| 140 |
+
print("\nOptional modules:")
|
| 141 |
+
modules = ["csv", "cuda", "dataset", "feather", "flight", "fs", "gandiva", "json",
|
| 142 |
+
"orc", "parquet"]
|
| 143 |
+
for module in modules:
|
| 144 |
+
status = "Enabled" if _module_is_available(module) else "-"
|
| 145 |
+
print(f" {module: <20}: {status: <8}")
|
| 146 |
+
|
| 147 |
+
print("\nFilesystems:")
|
| 148 |
+
filesystems = ["AzureFileSystem", "GcsFileSystem",
|
| 149 |
+
"HadoopFileSystem", "S3FileSystem"]
|
| 150 |
+
for fs in filesystems:
|
| 151 |
+
status = "Enabled" if _filesystem_is_available(fs) else "-"
|
| 152 |
+
print(f" {fs: <20}: {status: <8}")
|
| 153 |
+
|
| 154 |
+
print("\nCompression Codecs:")
|
| 155 |
+
codecs = ["brotli", "bz2", "gzip", "lz4_frame", "lz4", "snappy", "zstd"]
|
| 156 |
+
for codec in codecs:
|
| 157 |
+
status = "Enabled" if Codec.is_available(codec) else "-"
|
| 158 |
+
print(f" {codec: <20}: {status: <8}")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
from pyarrow.lib import (null, bool_,
|
| 162 |
+
int8, int16, int32, int64,
|
| 163 |
+
uint8, uint16, uint32, uint64,
|
| 164 |
+
time32, time64, timestamp, date32, date64, duration,
|
| 165 |
+
month_day_nano_interval,
|
| 166 |
+
float16, float32, float64,
|
| 167 |
+
binary, string, utf8, binary_view, string_view,
|
| 168 |
+
large_binary, large_string, large_utf8,
|
| 169 |
+
decimal32, decimal64, decimal128, decimal256,
|
| 170 |
+
list_, large_list, list_view, large_list_view,
|
| 171 |
+
map_, struct,
|
| 172 |
+
union, sparse_union, dense_union,
|
| 173 |
+
dictionary,
|
| 174 |
+
run_end_encoded,
|
| 175 |
+
bool8, fixed_shape_tensor, json_, opaque, uuid,
|
| 176 |
+
field,
|
| 177 |
+
type_for_alias,
|
| 178 |
+
DataType, DictionaryType, StructType,
|
| 179 |
+
ListType, LargeListType, FixedSizeListType,
|
| 180 |
+
ListViewType, LargeListViewType,
|
| 181 |
+
MapType, UnionType, SparseUnionType, DenseUnionType,
|
| 182 |
+
TimestampType, Time32Type, Time64Type, DurationType,
|
| 183 |
+
FixedSizeBinaryType,
|
| 184 |
+
Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
|
| 185 |
+
BaseExtensionType, ExtensionType,
|
| 186 |
+
RunEndEncodedType, Bool8Type, FixedShapeTensorType,
|
| 187 |
+
JsonType, OpaqueType, UuidType,
|
| 188 |
+
PyExtensionType, UnknownExtensionType,
|
| 189 |
+
register_extension_type, unregister_extension_type,
|
| 190 |
+
DictionaryMemo,
|
| 191 |
+
KeyValueMetadata,
|
| 192 |
+
Field,
|
| 193 |
+
Schema,
|
| 194 |
+
schema,
|
| 195 |
+
unify_schemas,
|
| 196 |
+
Array, Tensor,
|
| 197 |
+
array, chunked_array, record_batch, nulls, repeat,
|
| 198 |
+
SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
|
| 199 |
+
SparseCSFTensor,
|
| 200 |
+
infer_type, from_numpy_dtype,
|
| 201 |
+
NullArray,
|
| 202 |
+
NumericArray, IntegerArray, FloatingPointArray,
|
| 203 |
+
BooleanArray,
|
| 204 |
+
Int8Array, UInt8Array,
|
| 205 |
+
Int16Array, UInt16Array,
|
| 206 |
+
Int32Array, UInt32Array,
|
| 207 |
+
Int64Array, UInt64Array,
|
| 208 |
+
HalfFloatArray, FloatArray, DoubleArray,
|
| 209 |
+
ListArray, LargeListArray, FixedSizeListArray,
|
| 210 |
+
ListViewArray, LargeListViewArray,
|
| 211 |
+
MapArray, UnionArray,
|
| 212 |
+
BinaryArray, StringArray,
|
| 213 |
+
LargeBinaryArray, LargeStringArray,
|
| 214 |
+
BinaryViewArray, StringViewArray,
|
| 215 |
+
FixedSizeBinaryArray,
|
| 216 |
+
DictionaryArray,
|
| 217 |
+
Date32Array, Date64Array, TimestampArray,
|
| 218 |
+
Time32Array, Time64Array, DurationArray,
|
| 219 |
+
MonthDayNanoIntervalArray,
|
| 220 |
+
Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
|
| 221 |
+
StructArray, ExtensionArray,
|
| 222 |
+
RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
|
| 223 |
+
JsonArray, OpaqueArray, UuidArray,
|
| 224 |
+
scalar, NA, _NULL as NULL, Scalar,
|
| 225 |
+
NullScalar, BooleanScalar,
|
| 226 |
+
Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
|
| 227 |
+
UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
|
| 228 |
+
HalfFloatScalar, FloatScalar, DoubleScalar,
|
| 229 |
+
Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar,
|
| 230 |
+
ListScalar, LargeListScalar, FixedSizeListScalar,
|
| 231 |
+
ListViewScalar, LargeListViewScalar,
|
| 232 |
+
Date32Scalar, Date64Scalar,
|
| 233 |
+
Time32Scalar, Time64Scalar,
|
| 234 |
+
TimestampScalar, DurationScalar,
|
| 235 |
+
MonthDayNanoIntervalScalar,
|
| 236 |
+
BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
|
| 237 |
+
StringScalar, LargeStringScalar, StringViewScalar,
|
| 238 |
+
FixedSizeBinaryScalar, DictionaryScalar,
|
| 239 |
+
MapScalar, StructScalar, UnionScalar,
|
| 240 |
+
RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
|
| 241 |
+
FixedShapeTensorScalar, JsonScalar, OpaqueScalar, UuidScalar)
|
| 242 |
+
|
| 243 |
+
# Buffers, allocation
|
| 244 |
+
from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
|
| 245 |
+
default_cpu_memory_manager)
|
| 246 |
+
|
| 247 |
+
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
|
| 248 |
+
Codec, compress, decompress, allocate_buffer)
|
| 249 |
+
|
| 250 |
+
from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
|
| 251 |
+
total_allocated_bytes, set_memory_pool,
|
| 252 |
+
default_memory_pool, system_memory_pool,
|
| 253 |
+
jemalloc_memory_pool, mimalloc_memory_pool,
|
| 254 |
+
logging_memory_pool, proxy_memory_pool,
|
| 255 |
+
log_memory_allocations, jemalloc_set_decay_ms,
|
| 256 |
+
supported_memory_backends)
|
| 257 |
+
|
| 258 |
+
# I/O
|
| 259 |
+
from pyarrow.lib import (NativeFile, PythonFile,
|
| 260 |
+
BufferedInputStream, BufferedOutputStream, CacheOptions,
|
| 261 |
+
CompressedInputStream, CompressedOutputStream,
|
| 262 |
+
TransformInputStream, transcoding_input_stream,
|
| 263 |
+
FixedSizeBufferWriter,
|
| 264 |
+
BufferReader, BufferOutputStream,
|
| 265 |
+
OSFile, MemoryMappedFile, memory_map,
|
| 266 |
+
create_memory_map, MockOutputStream,
|
| 267 |
+
input_stream, output_stream,
|
| 268 |
+
have_libhdfs)
|
| 269 |
+
|
| 270 |
+
from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
|
| 271 |
+
concat_arrays, concat_tables, TableGroupBy,
|
| 272 |
+
RecordBatchReader, concat_batches)
|
| 273 |
+
|
| 274 |
+
# Exceptions
|
| 275 |
+
from pyarrow.lib import (ArrowCancelled,
|
| 276 |
+
ArrowCapacityError,
|
| 277 |
+
ArrowException,
|
| 278 |
+
ArrowKeyError,
|
| 279 |
+
ArrowIndexError,
|
| 280 |
+
ArrowInvalid,
|
| 281 |
+
ArrowIOError,
|
| 282 |
+
ArrowMemoryError,
|
| 283 |
+
ArrowNotImplementedError,
|
| 284 |
+
ArrowTypeError,
|
| 285 |
+
ArrowSerializationError)
|
| 286 |
+
|
| 287 |
+
from pyarrow.ipc import serialize_pandas, deserialize_pandas
|
| 288 |
+
import pyarrow.ipc as ipc
|
| 289 |
+
|
| 290 |
+
import pyarrow.types as types
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
# ----------------------------------------------------------------------
|
| 294 |
+
# Deprecations
|
| 295 |
+
|
| 296 |
+
from pyarrow.util import _deprecate_api, _deprecate_class
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
# TODO: Deprecate these somehow in the pyarrow namespace
|
| 300 |
+
from pyarrow.ipc import (Message, MessageReader, MetadataVersion,
|
| 301 |
+
RecordBatchFileReader, RecordBatchFileWriter,
|
| 302 |
+
RecordBatchStreamReader, RecordBatchStreamWriter)
|
| 303 |
+
|
| 304 |
+
# ----------------------------------------------------------------------
|
| 305 |
+
# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
|
| 306 |
+
# wheels)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def get_include():
|
| 310 |
+
"""
|
| 311 |
+
Return absolute path to directory containing Arrow C++ include
|
| 312 |
+
headers. Similar to numpy.get_include
|
| 313 |
+
"""
|
| 314 |
+
return _os.path.join(_os.path.dirname(__file__), 'include')
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
def _get_pkg_config_executable():
|
| 318 |
+
return _os.environ.get('PKG_CONFIG', 'pkg-config')
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def _has_pkg_config(pkgname):
|
| 322 |
+
import subprocess
|
| 323 |
+
try:
|
| 324 |
+
return subprocess.call([_get_pkg_config_executable(),
|
| 325 |
+
'--exists', pkgname]) == 0
|
| 326 |
+
except FileNotFoundError:
|
| 327 |
+
return False
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def _read_pkg_config_variable(pkgname, cli_args):
|
| 331 |
+
import subprocess
|
| 332 |
+
cmd = [_get_pkg_config_executable(), pkgname] + cli_args
|
| 333 |
+
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
| 334 |
+
stderr=subprocess.PIPE)
|
| 335 |
+
out, err = proc.communicate()
|
| 336 |
+
if proc.returncode != 0:
|
| 337 |
+
raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
|
| 338 |
+
return out.rstrip().decode('utf8')
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def get_libraries():
|
| 342 |
+
"""
|
| 343 |
+
Return list of library names to include in the `libraries` argument for C
|
| 344 |
+
or Cython extensions using pyarrow
|
| 345 |
+
"""
|
| 346 |
+
return ['arrow_python', 'arrow']
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
def create_library_symlinks():
|
| 350 |
+
"""
|
| 351 |
+
With Linux and macOS wheels, the bundled shared libraries have an embedded
|
| 352 |
+
ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
|
| 353 |
+
with -larrow won't work unless we create symlinks at locations like
|
| 354 |
+
site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
|
| 355 |
+
prior problems we had with shipping two copies of the shared libraries to
|
| 356 |
+
permit third party projects like turbodbc to build their C++ extensions
|
| 357 |
+
against the pyarrow wheels.
|
| 358 |
+
|
| 359 |
+
This function must only be invoked once and only when the shared libraries
|
| 360 |
+
are bundled with the Python package, which should only apply to wheel-based
|
| 361 |
+
installs. It requires write access to the site-packages/pyarrow directory
|
| 362 |
+
and so depending on your system may need to be run with root.
|
| 363 |
+
"""
|
| 364 |
+
import glob
|
| 365 |
+
if _sys.platform == 'win32':
|
| 366 |
+
return
|
| 367 |
+
package_cwd = _os.path.dirname(__file__)
|
| 368 |
+
|
| 369 |
+
if _sys.platform == 'linux':
|
| 370 |
+
bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
|
| 371 |
+
|
| 372 |
+
def get_symlink_path(hard_path):
|
| 373 |
+
return hard_path.rsplit('.', 1)[0]
|
| 374 |
+
else:
|
| 375 |
+
bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
|
| 376 |
+
|
| 377 |
+
def get_symlink_path(hard_path):
|
| 378 |
+
return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
|
| 379 |
+
|
| 380 |
+
for lib_hard_path in bundled_libs:
|
| 381 |
+
symlink_path = get_symlink_path(lib_hard_path)
|
| 382 |
+
if _os.path.exists(symlink_path):
|
| 383 |
+
continue
|
| 384 |
+
try:
|
| 385 |
+
_os.symlink(lib_hard_path, symlink_path)
|
| 386 |
+
except PermissionError:
|
| 387 |
+
print("Tried creating symlink {}. If you need to link to "
|
| 388 |
+
"bundled shared libraries, run "
|
| 389 |
+
"pyarrow.create_library_symlinks() as root")
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
def get_library_dirs():
|
| 393 |
+
"""
|
| 394 |
+
Return lists of directories likely to contain Arrow C++ libraries for
|
| 395 |
+
linking C or Cython extensions using pyarrow
|
| 396 |
+
"""
|
| 397 |
+
package_cwd = _os.path.dirname(__file__)
|
| 398 |
+
library_dirs = [package_cwd]
|
| 399 |
+
|
| 400 |
+
def append_library_dir(library_dir):
|
| 401 |
+
if library_dir not in library_dirs:
|
| 402 |
+
library_dirs.append(library_dir)
|
| 403 |
+
|
| 404 |
+
# Search library paths via pkg-config. This is necessary if the user
|
| 405 |
+
# installed libarrow and the other shared libraries manually and they
|
| 406 |
+
# are not shipped inside the pyarrow package (see also ARROW-2976).
|
| 407 |
+
pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
|
| 408 |
+
for pkgname in ["arrow", "arrow_python"]:
|
| 409 |
+
if _has_pkg_config(pkgname):
|
| 410 |
+
library_dir = _read_pkg_config_variable(pkgname,
|
| 411 |
+
["--libs-only-L"])
|
| 412 |
+
# pkg-config output could be empty if Arrow is installed
|
| 413 |
+
# as a system package.
|
| 414 |
+
if library_dir:
|
| 415 |
+
if not library_dir.startswith("-L"):
|
| 416 |
+
raise ValueError(
|
| 417 |
+
"pkg-config --libs-only-L returned unexpected "
|
| 418 |
+
"value {!r}".format(library_dir))
|
| 419 |
+
append_library_dir(library_dir[2:])
|
| 420 |
+
|
| 421 |
+
if _sys.platform == 'win32':
|
| 422 |
+
# TODO(wesm): Is this necessary, or does setuptools within a conda
|
| 423 |
+
# installation add Library\lib to the linker path for MSVC?
|
| 424 |
+
python_base_install = _os.path.dirname(_sys.executable)
|
| 425 |
+
library_dir = _os.path.join(python_base_install, 'Library', 'lib')
|
| 426 |
+
|
| 427 |
+
if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
|
| 428 |
+
append_library_dir(library_dir)
|
| 429 |
+
|
| 430 |
+
# ARROW-4074: Allow for ARROW_HOME to be set to some other directory
|
| 431 |
+
if _os.environ.get('ARROW_HOME'):
|
| 432 |
+
append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
|
| 433 |
+
else:
|
| 434 |
+
# Python wheels bundle the Arrow libraries in the pyarrow directory.
|
| 435 |
+
append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
|
| 436 |
+
|
| 437 |
+
return library_dirs
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_azurefs.pyx
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
|
| 20 |
+
from cython cimport binding
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
from pyarrow.lib import frombytes, tobytes
|
| 24 |
+
from pyarrow.includes.libarrow_fs cimport *
|
| 25 |
+
from pyarrow._fs cimport FileSystem
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
cdef class AzureFileSystem(FileSystem):
|
| 29 |
+
"""
|
| 30 |
+
Azure Blob Storage backed FileSystem implementation
|
| 31 |
+
|
| 32 |
+
This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
|
| 33 |
+
Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
|
| 34 |
+
features will be used when they provide a performance advantage. Azurite emulator is
|
| 35 |
+
also supported. Note: `/` is the only supported delimiter.
|
| 36 |
+
|
| 37 |
+
The storage account is considered the root of the filesystem. When enabled, containers
|
| 38 |
+
will be created or deleted during relevant directory operations. Obviously, this also
|
| 39 |
+
requires authentication with the additional permissions.
|
| 40 |
+
|
| 41 |
+
By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
|
| 42 |
+
is used for authentication. This means it will try several types of authentication
|
| 43 |
+
and go with the first one that works. If any authentication parameters are provided when
|
| 44 |
+
initialising the FileSystem, they will be used instead of the default credential.
|
| 45 |
+
|
| 46 |
+
Parameters
|
| 47 |
+
----------
|
| 48 |
+
account_name : str
|
| 49 |
+
Azure Blob Storage account name. This is the globally unique identifier for the
|
| 50 |
+
storage account.
|
| 51 |
+
account_key : str, default None
|
| 52 |
+
Account key of the storage account. Pass None to use default credential.
|
| 53 |
+
blob_storage_authority : str, default None
|
| 54 |
+
hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
|
| 55 |
+
for connecting to a local emulator, like Azurite.
|
| 56 |
+
dfs_storage_authority : str, default None
|
| 57 |
+
hostname[:port] of the Data Lake Gen 2 Service. Defaults to
|
| 58 |
+
`.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
|
| 59 |
+
blob_storage_scheme : str, default None
|
| 60 |
+
Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
|
| 61 |
+
emulator, like Azurite.
|
| 62 |
+
dfs_storage_scheme : str, default None
|
| 63 |
+
Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
|
| 64 |
+
emulator, like Azurite.
|
| 65 |
+
|
| 66 |
+
Examples
|
| 67 |
+
--------
|
| 68 |
+
>>> from pyarrow import fs
|
| 69 |
+
>>> azure_fs = fs.AzureFileSystem(account_name='myaccount')
|
| 70 |
+
>>> azurite_fs = fs.AzureFileSystem(
|
| 71 |
+
... account_name='devstoreaccount1',
|
| 72 |
+
... account_key='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
|
| 73 |
+
... blob_storage_authority='127.0.0.1:10000',
|
| 74 |
+
... dfs_storage_authority='127.0.0.1:10000',
|
| 75 |
+
... blob_storage_scheme='http',
|
| 76 |
+
... dfs_storage_scheme='http',
|
| 77 |
+
... )
|
| 78 |
+
|
| 79 |
+
For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
|
| 80 |
+
"""
|
| 81 |
+
cdef:
|
| 82 |
+
CAzureFileSystem* azurefs
|
| 83 |
+
c_string account_key
|
| 84 |
+
|
| 85 |
+
def __init__(self, account_name, *, account_key=None, blob_storage_authority=None,
|
| 86 |
+
dfs_storage_authority=None, blob_storage_scheme=None,
|
| 87 |
+
dfs_storage_scheme=None):
|
| 88 |
+
cdef:
|
| 89 |
+
CAzureOptions options
|
| 90 |
+
shared_ptr[CAzureFileSystem] wrapped
|
| 91 |
+
|
| 92 |
+
options.account_name = tobytes(account_name)
|
| 93 |
+
if blob_storage_authority:
|
| 94 |
+
options.blob_storage_authority = tobytes(blob_storage_authority)
|
| 95 |
+
if dfs_storage_authority:
|
| 96 |
+
options.dfs_storage_authority = tobytes(dfs_storage_authority)
|
| 97 |
+
if blob_storage_scheme:
|
| 98 |
+
options.blob_storage_scheme = tobytes(blob_storage_scheme)
|
| 99 |
+
if dfs_storage_scheme:
|
| 100 |
+
options.dfs_storage_scheme = tobytes(dfs_storage_scheme)
|
| 101 |
+
|
| 102 |
+
if account_key:
|
| 103 |
+
options.ConfigureAccountKeyCredential(tobytes(account_key))
|
| 104 |
+
self.account_key = tobytes(account_key)
|
| 105 |
+
else:
|
| 106 |
+
options.ConfigureDefaultCredential()
|
| 107 |
+
|
| 108 |
+
with nogil:
|
| 109 |
+
wrapped = GetResultValue(CAzureFileSystem.Make(options))
|
| 110 |
+
|
| 111 |
+
self.init(<shared_ptr[CFileSystem]> wrapped)
|
| 112 |
+
|
| 113 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped):
|
| 114 |
+
FileSystem.init(self, wrapped)
|
| 115 |
+
self.azurefs = <CAzureFileSystem*> wrapped.get()
|
| 116 |
+
|
| 117 |
+
@staticmethod
|
| 118 |
+
@binding(True) # Required for cython < 3
|
| 119 |
+
def _reconstruct(kwargs):
|
| 120 |
+
# __reduce__ doesn't allow passing named arguments directly to the
|
| 121 |
+
# reconstructor, hence this wrapper.
|
| 122 |
+
return AzureFileSystem(**kwargs)
|
| 123 |
+
|
| 124 |
+
def __reduce__(self):
|
| 125 |
+
cdef CAzureOptions opts = self.azurefs.options()
|
| 126 |
+
return (
|
| 127 |
+
AzureFileSystem._reconstruct, (dict(
|
| 128 |
+
account_name=frombytes(opts.account_name),
|
| 129 |
+
account_key=frombytes(self.account_key),
|
| 130 |
+
blob_storage_authority=frombytes(opts.blob_storage_authority),
|
| 131 |
+
dfs_storage_authority=frombytes(opts.dfs_storage_authority),
|
| 132 |
+
blob_storage_scheme=frombytes(opts.blob_storage_scheme),
|
| 133 |
+
dfs_storage_scheme=frombytes(opts.dfs_storage_scheme)
|
| 134 |
+
),))
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute.pyx
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute_docstrings.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
"""
|
| 19 |
+
Custom documentation additions for compute functions.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
function_doc_additions = {}
|
| 23 |
+
|
| 24 |
+
function_doc_additions["filter"] = """
|
| 25 |
+
Examples
|
| 26 |
+
--------
|
| 27 |
+
>>> import pyarrow as pa
|
| 28 |
+
>>> arr = pa.array(["a", "b", "c", None, "e"])
|
| 29 |
+
>>> mask = pa.array([True, False, None, False, True])
|
| 30 |
+
>>> arr.filter(mask)
|
| 31 |
+
<pyarrow.lib.StringArray object at ...>
|
| 32 |
+
[
|
| 33 |
+
"a",
|
| 34 |
+
"e"
|
| 35 |
+
]
|
| 36 |
+
>>> arr.filter(mask, null_selection_behavior='emit_null')
|
| 37 |
+
<pyarrow.lib.StringArray object at ...>
|
| 38 |
+
[
|
| 39 |
+
"a",
|
| 40 |
+
null,
|
| 41 |
+
"e"
|
| 42 |
+
]
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
function_doc_additions["mode"] = """
|
| 46 |
+
Examples
|
| 47 |
+
--------
|
| 48 |
+
>>> import pyarrow as pa
|
| 49 |
+
>>> import pyarrow.compute as pc
|
| 50 |
+
>>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
|
| 51 |
+
>>> modes = pc.mode(arr, 2)
|
| 52 |
+
>>> modes[0]
|
| 53 |
+
<pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
|
| 54 |
+
>>> modes[1]
|
| 55 |
+
<pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
|
| 56 |
+
"""
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_cuda.pyx
ADDED
|
@@ -0,0 +1,1080 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
from pyarrow.lib cimport *
|
| 20 |
+
from pyarrow.includes.libarrow_cuda cimport *
|
| 21 |
+
from pyarrow.lib import allocate_buffer, as_buffer, ArrowTypeError
|
| 22 |
+
from pyarrow.util import get_contiguous_span
|
| 23 |
+
cimport cpython as cp
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
cdef class Context(_Weakrefable):
|
| 27 |
+
"""
|
| 28 |
+
CUDA driver context.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, *args, **kwargs):
|
| 32 |
+
"""
|
| 33 |
+
Create a CUDA driver context for a particular device.
|
| 34 |
+
|
| 35 |
+
If a CUDA context handle is passed, it is wrapped, otherwise
|
| 36 |
+
a default CUDA context for the given device is requested.
|
| 37 |
+
|
| 38 |
+
Parameters
|
| 39 |
+
----------
|
| 40 |
+
device_number : int (default 0)
|
| 41 |
+
Specify the GPU device for which the CUDA driver context is
|
| 42 |
+
requested.
|
| 43 |
+
handle : int, optional
|
| 44 |
+
Specify CUDA handle for a shared context that has been created
|
| 45 |
+
by another library.
|
| 46 |
+
"""
|
| 47 |
+
# This method exposed because autodoc doesn't pick __cinit__
|
| 48 |
+
|
| 49 |
+
def __cinit__(self, int device_number=0, uintptr_t handle=0):
|
| 50 |
+
cdef CCudaDeviceManager* manager
|
| 51 |
+
manager = GetResultValue(CCudaDeviceManager.Instance())
|
| 52 |
+
cdef int n = manager.num_devices()
|
| 53 |
+
if device_number >= n or device_number < 0:
|
| 54 |
+
self.context.reset()
|
| 55 |
+
raise ValueError('device_number argument must be '
|
| 56 |
+
'non-negative less than %s' % (n))
|
| 57 |
+
if handle == 0:
|
| 58 |
+
self.context = GetResultValue(manager.GetContext(device_number))
|
| 59 |
+
else:
|
| 60 |
+
self.context = GetResultValue(manager.GetSharedContext(
|
| 61 |
+
device_number, <void*>handle))
|
| 62 |
+
self.device_number = device_number
|
| 63 |
+
|
| 64 |
+
@staticmethod
|
| 65 |
+
def from_numba(context=None):
|
| 66 |
+
"""
|
| 67 |
+
Create a Context instance from a Numba CUDA context.
|
| 68 |
+
|
| 69 |
+
Parameters
|
| 70 |
+
----------
|
| 71 |
+
context : {numba.cuda.cudadrv.driver.Context, None}
|
| 72 |
+
A Numba CUDA context instance.
|
| 73 |
+
If None, the current Numba context is used.
|
| 74 |
+
|
| 75 |
+
Returns
|
| 76 |
+
-------
|
| 77 |
+
shared_context : pyarrow.cuda.Context
|
| 78 |
+
Context instance.
|
| 79 |
+
"""
|
| 80 |
+
if context is None:
|
| 81 |
+
import numba.cuda
|
| 82 |
+
context = numba.cuda.current_context()
|
| 83 |
+
return Context(device_number=context.device.id,
|
| 84 |
+
handle=context.handle.value)
|
| 85 |
+
|
| 86 |
+
def to_numba(self):
|
| 87 |
+
"""
|
| 88 |
+
Convert Context to a Numba CUDA context.
|
| 89 |
+
|
| 90 |
+
Returns
|
| 91 |
+
-------
|
| 92 |
+
context : numba.cuda.cudadrv.driver.Context
|
| 93 |
+
Numba CUDA context instance.
|
| 94 |
+
"""
|
| 95 |
+
import ctypes
|
| 96 |
+
import numba.cuda
|
| 97 |
+
device = numba.cuda.gpus[self.device_number]
|
| 98 |
+
handle = ctypes.c_void_p(self.handle)
|
| 99 |
+
context = numba.cuda.cudadrv.driver.Context(device, handle)
|
| 100 |
+
|
| 101 |
+
class DummyPendingDeallocs(object):
|
| 102 |
+
# Context is managed by pyarrow
|
| 103 |
+
def add_item(self, *args, **kwargs):
|
| 104 |
+
pass
|
| 105 |
+
|
| 106 |
+
context.deallocations = DummyPendingDeallocs()
|
| 107 |
+
return context
|
| 108 |
+
|
| 109 |
+
@staticmethod
|
| 110 |
+
def get_num_devices():
|
| 111 |
+
""" Return the number of GPU devices.
|
| 112 |
+
"""
|
| 113 |
+
cdef CCudaDeviceManager* manager
|
| 114 |
+
manager = GetResultValue(CCudaDeviceManager.Instance())
|
| 115 |
+
return manager.num_devices()
|
| 116 |
+
|
| 117 |
+
@property
|
| 118 |
+
def device_number(self):
|
| 119 |
+
""" Return context device number.
|
| 120 |
+
"""
|
| 121 |
+
return self.device_number
|
| 122 |
+
|
| 123 |
+
@property
|
| 124 |
+
def handle(self):
|
| 125 |
+
""" Return pointer to context handle.
|
| 126 |
+
"""
|
| 127 |
+
return <uintptr_t>self.context.get().handle()
|
| 128 |
+
|
| 129 |
+
cdef void init(self, const shared_ptr[CCudaContext]& ctx):
|
| 130 |
+
self.context = ctx
|
| 131 |
+
|
| 132 |
+
def synchronize(self):
|
| 133 |
+
"""Blocks until the device has completed all preceding requested
|
| 134 |
+
tasks.
|
| 135 |
+
"""
|
| 136 |
+
check_status(self.context.get().Synchronize())
|
| 137 |
+
|
| 138 |
+
@property
|
| 139 |
+
def bytes_allocated(self):
|
| 140 |
+
"""Return the number of allocated bytes.
|
| 141 |
+
"""
|
| 142 |
+
return self.context.get().bytes_allocated()
|
| 143 |
+
|
| 144 |
+
def get_device_address(self, uintptr_t address):
|
| 145 |
+
"""Return the device address that is reachable from kernels running in
|
| 146 |
+
the context
|
| 147 |
+
|
| 148 |
+
Parameters
|
| 149 |
+
----------
|
| 150 |
+
address : int
|
| 151 |
+
Specify memory address value
|
| 152 |
+
|
| 153 |
+
Returns
|
| 154 |
+
-------
|
| 155 |
+
device_address : int
|
| 156 |
+
Device address accessible from device context
|
| 157 |
+
|
| 158 |
+
Notes
|
| 159 |
+
-----
|
| 160 |
+
The device address is defined as a memory address accessible
|
| 161 |
+
by device. While it is often a device memory address but it
|
| 162 |
+
can be also a host memory address, for instance, when the
|
| 163 |
+
memory is allocated as host memory (using cudaMallocHost or
|
| 164 |
+
cudaHostAlloc) or as managed memory (using cudaMallocManaged)
|
| 165 |
+
or the host memory is page-locked (using cudaHostRegister).
|
| 166 |
+
"""
|
| 167 |
+
return GetResultValue(self.context.get().GetDeviceAddress(address))
|
| 168 |
+
|
| 169 |
+
def new_buffer(self, int64_t nbytes):
|
| 170 |
+
"""Return new device buffer.
|
| 171 |
+
|
| 172 |
+
Parameters
|
| 173 |
+
----------
|
| 174 |
+
nbytes : int
|
| 175 |
+
Specify the number of bytes to be allocated.
|
| 176 |
+
|
| 177 |
+
Returns
|
| 178 |
+
-------
|
| 179 |
+
buf : CudaBuffer
|
| 180 |
+
Allocated buffer.
|
| 181 |
+
"""
|
| 182 |
+
cdef:
|
| 183 |
+
shared_ptr[CCudaBuffer] cudabuf
|
| 184 |
+
with nogil:
|
| 185 |
+
cudabuf = GetResultValue(self.context.get().Allocate(nbytes))
|
| 186 |
+
return pyarrow_wrap_cudabuffer(cudabuf)
|
| 187 |
+
|
| 188 |
+
@property
|
| 189 |
+
def memory_manager(self):
|
| 190 |
+
"""
|
| 191 |
+
The default memory manager tied to this context's device.
|
| 192 |
+
|
| 193 |
+
Returns
|
| 194 |
+
-------
|
| 195 |
+
MemoryManager
|
| 196 |
+
"""
|
| 197 |
+
return MemoryManager.wrap(self.context.get().memory_manager())
|
| 198 |
+
|
| 199 |
+
@property
|
| 200 |
+
def device(self):
|
| 201 |
+
"""
|
| 202 |
+
The device instance associated with this context.
|
| 203 |
+
|
| 204 |
+
Returns
|
| 205 |
+
-------
|
| 206 |
+
Device
|
| 207 |
+
"""
|
| 208 |
+
return Device.wrap(self.context.get().device())
|
| 209 |
+
|
| 210 |
+
def foreign_buffer(self, address, size, base=None):
|
| 211 |
+
"""
|
| 212 |
+
Create device buffer from address and size as a view.
|
| 213 |
+
|
| 214 |
+
The caller is responsible for allocating and freeing the
|
| 215 |
+
memory. When `address==size==0` then a new zero-sized buffer
|
| 216 |
+
is returned.
|
| 217 |
+
|
| 218 |
+
Parameters
|
| 219 |
+
----------
|
| 220 |
+
address : int
|
| 221 |
+
Specify the starting address of the buffer. The address can
|
| 222 |
+
refer to both device or host memory but it must be
|
| 223 |
+
accessible from device after mapping it with
|
| 224 |
+
`get_device_address` method.
|
| 225 |
+
size : int
|
| 226 |
+
Specify the size of device buffer in bytes.
|
| 227 |
+
base : {None, object}
|
| 228 |
+
Specify object that owns the referenced memory.
|
| 229 |
+
|
| 230 |
+
Returns
|
| 231 |
+
-------
|
| 232 |
+
cbuf : CudaBuffer
|
| 233 |
+
Device buffer as a view of device reachable memory.
|
| 234 |
+
|
| 235 |
+
"""
|
| 236 |
+
if not address and size == 0:
|
| 237 |
+
return self.new_buffer(0)
|
| 238 |
+
cdef:
|
| 239 |
+
uintptr_t c_addr = self.get_device_address(address)
|
| 240 |
+
int64_t c_size = size
|
| 241 |
+
shared_ptr[CCudaBuffer] cudabuf
|
| 242 |
+
|
| 243 |
+
cudabuf = GetResultValue(self.context.get().View(
|
| 244 |
+
<uint8_t*>c_addr, c_size))
|
| 245 |
+
return pyarrow_wrap_cudabuffer_base(cudabuf, base)
|
| 246 |
+
|
| 247 |
+
def open_ipc_buffer(self, ipc_handle):
|
| 248 |
+
""" Open existing CUDA IPC memory handle
|
| 249 |
+
|
| 250 |
+
Parameters
|
| 251 |
+
----------
|
| 252 |
+
ipc_handle : IpcMemHandle
|
| 253 |
+
Specify opaque pointer to CUipcMemHandle (driver API).
|
| 254 |
+
|
| 255 |
+
Returns
|
| 256 |
+
-------
|
| 257 |
+
buf : CudaBuffer
|
| 258 |
+
referencing device buffer
|
| 259 |
+
"""
|
| 260 |
+
handle = pyarrow_unwrap_cudaipcmemhandle(ipc_handle)
|
| 261 |
+
cdef shared_ptr[CCudaBuffer] cudabuf
|
| 262 |
+
with nogil:
|
| 263 |
+
cudabuf = GetResultValue(
|
| 264 |
+
self.context.get().OpenIpcBuffer(handle.get()[0]))
|
| 265 |
+
return pyarrow_wrap_cudabuffer(cudabuf)
|
| 266 |
+
|
| 267 |
+
def buffer_from_data(self, object data, int64_t offset=0, int64_t size=-1):
|
| 268 |
+
"""Create device buffer and initialize with data.
|
| 269 |
+
|
| 270 |
+
Parameters
|
| 271 |
+
----------
|
| 272 |
+
data : {CudaBuffer, HostBuffer, Buffer, array-like}
|
| 273 |
+
Specify data to be copied to device buffer.
|
| 274 |
+
offset : int
|
| 275 |
+
Specify the offset of input buffer for device data
|
| 276 |
+
buffering. Default: 0.
|
| 277 |
+
size : int
|
| 278 |
+
Specify the size of device buffer in bytes. Default: all
|
| 279 |
+
(starting from input offset)
|
| 280 |
+
|
| 281 |
+
Returns
|
| 282 |
+
-------
|
| 283 |
+
cbuf : CudaBuffer
|
| 284 |
+
Device buffer with copied data.
|
| 285 |
+
"""
|
| 286 |
+
is_host_data = not pyarrow_is_cudabuffer(data)
|
| 287 |
+
buf = as_buffer(data) if is_host_data else data
|
| 288 |
+
|
| 289 |
+
bsize = buf.size
|
| 290 |
+
if offset < 0 or (bsize and offset >= bsize):
|
| 291 |
+
raise ValueError('offset argument is out-of-range')
|
| 292 |
+
if size < 0:
|
| 293 |
+
size = bsize - offset
|
| 294 |
+
elif offset + size > bsize:
|
| 295 |
+
raise ValueError(
|
| 296 |
+
'requested larger slice than available in device buffer')
|
| 297 |
+
|
| 298 |
+
if offset != 0 or size != bsize:
|
| 299 |
+
buf = buf.slice(offset, size)
|
| 300 |
+
|
| 301 |
+
result = self.new_buffer(size)
|
| 302 |
+
if is_host_data:
|
| 303 |
+
result.copy_from_host(buf, position=0, nbytes=size)
|
| 304 |
+
else:
|
| 305 |
+
result.copy_from_device(buf, position=0, nbytes=size)
|
| 306 |
+
return result
|
| 307 |
+
|
| 308 |
+
def buffer_from_object(self, obj):
|
| 309 |
+
"""Create device buffer view of arbitrary object that references
|
| 310 |
+
device accessible memory.
|
| 311 |
+
|
| 312 |
+
When the object contains a non-contiguous view of device
|
| 313 |
+
accessible memory then the returned device buffer will contain
|
| 314 |
+
contiguous view of the memory, that is, including the
|
| 315 |
+
intermediate data that is otherwise invisible to the input
|
| 316 |
+
object.
|
| 317 |
+
|
| 318 |
+
Parameters
|
| 319 |
+
----------
|
| 320 |
+
obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
|
| 321 |
+
Specify an object that holds (device or host) address that
|
| 322 |
+
can be accessed from device. This includes objects with
|
| 323 |
+
types defined in pyarrow.cuda as well as arbitrary objects
|
| 324 |
+
that implement the CUDA array interface as defined by numba.
|
| 325 |
+
|
| 326 |
+
Returns
|
| 327 |
+
-------
|
| 328 |
+
cbuf : CudaBuffer
|
| 329 |
+
Device buffer as a view of device accessible memory.
|
| 330 |
+
|
| 331 |
+
"""
|
| 332 |
+
if isinstance(obj, HostBuffer):
|
| 333 |
+
return self.foreign_buffer(obj.address, obj.size, base=obj)
|
| 334 |
+
elif isinstance(obj, Buffer):
|
| 335 |
+
return CudaBuffer.from_buffer(obj)
|
| 336 |
+
elif isinstance(obj, CudaBuffer):
|
| 337 |
+
return obj
|
| 338 |
+
elif hasattr(obj, '__cuda_array_interface__'):
|
| 339 |
+
desc = obj.__cuda_array_interface__
|
| 340 |
+
addr = desc['data'][0]
|
| 341 |
+
if addr is None:
|
| 342 |
+
return self.new_buffer(0)
|
| 343 |
+
import numpy as np
|
| 344 |
+
start, end = get_contiguous_span(
|
| 345 |
+
desc['shape'], desc.get('strides'),
|
| 346 |
+
np.dtype(desc['typestr']).itemsize)
|
| 347 |
+
return self.foreign_buffer(addr + start, end - start, base=obj)
|
| 348 |
+
raise ArrowTypeError('cannot create device buffer view from'
|
| 349 |
+
' `%s` object' % (type(obj)))
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
cdef class IpcMemHandle(_Weakrefable):
|
| 353 |
+
"""A serializable container for a CUDA IPC handle.
|
| 354 |
+
"""
|
| 355 |
+
cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h):
|
| 356 |
+
self.handle = h
|
| 357 |
+
|
| 358 |
+
@staticmethod
|
| 359 |
+
def from_buffer(Buffer opaque_handle):
|
| 360 |
+
"""Create IpcMemHandle from opaque buffer (e.g. from another
|
| 361 |
+
process)
|
| 362 |
+
|
| 363 |
+
Parameters
|
| 364 |
+
----------
|
| 365 |
+
opaque_handle :
|
| 366 |
+
a CUipcMemHandle as a const void*
|
| 367 |
+
|
| 368 |
+
Returns
|
| 369 |
+
-------
|
| 370 |
+
ipc_handle : IpcMemHandle
|
| 371 |
+
"""
|
| 372 |
+
c_buf = pyarrow_unwrap_buffer(opaque_handle)
|
| 373 |
+
cdef:
|
| 374 |
+
shared_ptr[CCudaIpcMemHandle] handle
|
| 375 |
+
|
| 376 |
+
handle = GetResultValue(
|
| 377 |
+
CCudaIpcMemHandle.FromBuffer(c_buf.get().data()))
|
| 378 |
+
return pyarrow_wrap_cudaipcmemhandle(handle)
|
| 379 |
+
|
| 380 |
+
def serialize(self, pool=None):
|
| 381 |
+
"""Write IpcMemHandle to a Buffer
|
| 382 |
+
|
| 383 |
+
Parameters
|
| 384 |
+
----------
|
| 385 |
+
pool : {MemoryPool, None}
|
| 386 |
+
Specify a pool to allocate memory from
|
| 387 |
+
|
| 388 |
+
Returns
|
| 389 |
+
-------
|
| 390 |
+
buf : Buffer
|
| 391 |
+
The serialized buffer.
|
| 392 |
+
"""
|
| 393 |
+
cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
|
| 394 |
+
cdef shared_ptr[CBuffer] buf
|
| 395 |
+
cdef CCudaIpcMemHandle* h = self.handle.get()
|
| 396 |
+
with nogil:
|
| 397 |
+
buf = GetResultValue(h.Serialize(pool_))
|
| 398 |
+
return pyarrow_wrap_buffer(buf)
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
cdef class CudaBuffer(Buffer):
|
| 402 |
+
"""An Arrow buffer with data located in a GPU device.
|
| 403 |
+
|
| 404 |
+
To create a CudaBuffer instance, use Context.device_buffer().
|
| 405 |
+
|
| 406 |
+
The memory allocated in a CudaBuffer is freed when the buffer object
|
| 407 |
+
is deleted.
|
| 408 |
+
"""
|
| 409 |
+
|
| 410 |
+
def __init__(self):
|
| 411 |
+
raise TypeError("Do not call CudaBuffer's constructor directly, use "
|
| 412 |
+
"`<pyarrow.Context instance>.device_buffer`"
|
| 413 |
+
" method instead.")
|
| 414 |
+
|
| 415 |
+
cdef void init_cuda(self,
|
| 416 |
+
const shared_ptr[CCudaBuffer]& buffer,
|
| 417 |
+
object base):
|
| 418 |
+
self.cuda_buffer = buffer
|
| 419 |
+
self.init(<shared_ptr[CBuffer]> buffer)
|
| 420 |
+
self.base = base
|
| 421 |
+
|
| 422 |
+
@staticmethod
|
| 423 |
+
def from_buffer(buf):
|
| 424 |
+
""" Convert back generic buffer into CudaBuffer
|
| 425 |
+
|
| 426 |
+
Parameters
|
| 427 |
+
----------
|
| 428 |
+
buf : Buffer
|
| 429 |
+
Specify buffer containing CudaBuffer
|
| 430 |
+
|
| 431 |
+
Returns
|
| 432 |
+
-------
|
| 433 |
+
dbuf : CudaBuffer
|
| 434 |
+
Resulting device buffer.
|
| 435 |
+
"""
|
| 436 |
+
c_buf = pyarrow_unwrap_buffer(buf)
|
| 437 |
+
cuda_buffer = GetResultValue(CCudaBuffer.FromBuffer(c_buf))
|
| 438 |
+
return pyarrow_wrap_cudabuffer(cuda_buffer)
|
| 439 |
+
|
| 440 |
+
@staticmethod
|
| 441 |
+
def from_numba(mem):
|
| 442 |
+
"""Create a CudaBuffer view from numba MemoryPointer instance.
|
| 443 |
+
|
| 444 |
+
Parameters
|
| 445 |
+
----------
|
| 446 |
+
mem : numba.cuda.cudadrv.driver.MemoryPointer
|
| 447 |
+
|
| 448 |
+
Returns
|
| 449 |
+
-------
|
| 450 |
+
cbuf : CudaBuffer
|
| 451 |
+
Device buffer as a view of numba MemoryPointer.
|
| 452 |
+
"""
|
| 453 |
+
ctx = Context.from_numba(mem.context)
|
| 454 |
+
if mem.device_pointer.value is None and mem.size==0:
|
| 455 |
+
return ctx.new_buffer(0)
|
| 456 |
+
return ctx.foreign_buffer(mem.device_pointer.value, mem.size, base=mem)
|
| 457 |
+
|
| 458 |
+
def to_numba(self):
|
| 459 |
+
"""Return numba memory pointer of CudaBuffer instance.
|
| 460 |
+
"""
|
| 461 |
+
import ctypes
|
| 462 |
+
from numba.cuda.cudadrv.driver import MemoryPointer
|
| 463 |
+
return MemoryPointer(self.context.to_numba(),
|
| 464 |
+
pointer=ctypes.c_void_p(self.address),
|
| 465 |
+
size=self.size)
|
| 466 |
+
|
| 467 |
+
cdef getitem(self, int64_t i):
|
| 468 |
+
return self.copy_to_host(position=i, nbytes=1)[0]
|
| 469 |
+
|
| 470 |
+
def copy_to_host(self, int64_t position=0, int64_t nbytes=-1,
|
| 471 |
+
Buffer buf=None,
|
| 472 |
+
MemoryPool memory_pool=None, c_bool resizable=False):
|
| 473 |
+
"""Copy memory from GPU device to CPU host
|
| 474 |
+
|
| 475 |
+
Caller is responsible for ensuring that all tasks affecting
|
| 476 |
+
the memory are finished. Use
|
| 477 |
+
|
| 478 |
+
`<CudaBuffer instance>.context.synchronize()`
|
| 479 |
+
|
| 480 |
+
when needed.
|
| 481 |
+
|
| 482 |
+
Parameters
|
| 483 |
+
----------
|
| 484 |
+
position : int
|
| 485 |
+
Specify the starting position of the source data in GPU
|
| 486 |
+
device buffer. Default: 0.
|
| 487 |
+
nbytes : int
|
| 488 |
+
Specify the number of bytes to copy. Default: -1 (all from
|
| 489 |
+
the position until host buffer is full).
|
| 490 |
+
buf : Buffer
|
| 491 |
+
Specify a pre-allocated output buffer in host. Default: None
|
| 492 |
+
(allocate new output buffer).
|
| 493 |
+
memory_pool : MemoryPool
|
| 494 |
+
resizable : bool
|
| 495 |
+
Specify extra arguments to allocate_buffer. Used only when
|
| 496 |
+
buf is None.
|
| 497 |
+
|
| 498 |
+
Returns
|
| 499 |
+
-------
|
| 500 |
+
buf : Buffer
|
| 501 |
+
Output buffer in host.
|
| 502 |
+
|
| 503 |
+
"""
|
| 504 |
+
if position < 0 or (self.size and position > self.size) \
|
| 505 |
+
or (self.size == 0 and position != 0):
|
| 506 |
+
raise ValueError('position argument is out-of-range')
|
| 507 |
+
cdef:
|
| 508 |
+
int64_t c_nbytes
|
| 509 |
+
if buf is None:
|
| 510 |
+
if nbytes < 0:
|
| 511 |
+
# copy all starting from position to new host buffer
|
| 512 |
+
c_nbytes = self.size - position
|
| 513 |
+
else:
|
| 514 |
+
if nbytes > self.size - position:
|
| 515 |
+
raise ValueError(
|
| 516 |
+
'requested more to copy than available from '
|
| 517 |
+
'device buffer')
|
| 518 |
+
# copy nbytes starting from position to new host buffer
|
| 519 |
+
c_nbytes = nbytes
|
| 520 |
+
buf = allocate_buffer(c_nbytes, memory_pool=memory_pool,
|
| 521 |
+
resizable=resizable)
|
| 522 |
+
else:
|
| 523 |
+
if nbytes < 0:
|
| 524 |
+
# copy all from position until given host buffer is full
|
| 525 |
+
c_nbytes = min(self.size - position, buf.size)
|
| 526 |
+
else:
|
| 527 |
+
if nbytes > buf.size:
|
| 528 |
+
raise ValueError(
|
| 529 |
+
'requested copy does not fit into host buffer')
|
| 530 |
+
# copy nbytes from position to given host buffer
|
| 531 |
+
c_nbytes = nbytes
|
| 532 |
+
|
| 533 |
+
cdef:
|
| 534 |
+
shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
|
| 535 |
+
int64_t c_position = position
|
| 536 |
+
with nogil:
|
| 537 |
+
check_status(self.cuda_buffer.get()
|
| 538 |
+
.CopyToHost(c_position, c_nbytes,
|
| 539 |
+
c_buf.get().mutable_data()))
|
| 540 |
+
return buf
|
| 541 |
+
|
| 542 |
+
def copy_from_host(self, data, int64_t position=0, int64_t nbytes=-1):
|
| 543 |
+
"""Copy data from host to device.
|
| 544 |
+
|
| 545 |
+
The device buffer must be pre-allocated.
|
| 546 |
+
|
| 547 |
+
Parameters
|
| 548 |
+
----------
|
| 549 |
+
data : {Buffer, array-like}
|
| 550 |
+
Specify data in host. It can be array-like that is valid
|
| 551 |
+
argument to py_buffer
|
| 552 |
+
position : int
|
| 553 |
+
Specify the starting position of the copy in device buffer.
|
| 554 |
+
Default: 0.
|
| 555 |
+
nbytes : int
|
| 556 |
+
Specify the number of bytes to copy. Default: -1 (all from
|
| 557 |
+
source until device buffer, starting from position, is full)
|
| 558 |
+
|
| 559 |
+
Returns
|
| 560 |
+
-------
|
| 561 |
+
nbytes : int
|
| 562 |
+
Number of bytes copied.
|
| 563 |
+
"""
|
| 564 |
+
if position < 0 or position > self.size:
|
| 565 |
+
raise ValueError('position argument is out-of-range')
|
| 566 |
+
cdef:
|
| 567 |
+
int64_t c_nbytes
|
| 568 |
+
buf = as_buffer(data)
|
| 569 |
+
|
| 570 |
+
if nbytes < 0:
|
| 571 |
+
# copy from host buffer to device buffer starting from
|
| 572 |
+
# position until device buffer is full
|
| 573 |
+
c_nbytes = min(self.size - position, buf.size)
|
| 574 |
+
else:
|
| 575 |
+
if nbytes > buf.size:
|
| 576 |
+
raise ValueError(
|
| 577 |
+
'requested more to copy than available from host buffer')
|
| 578 |
+
if nbytes > self.size - position:
|
| 579 |
+
raise ValueError(
|
| 580 |
+
'requested more to copy than available in device buffer')
|
| 581 |
+
# copy nbytes from host buffer to device buffer starting
|
| 582 |
+
# from position
|
| 583 |
+
c_nbytes = nbytes
|
| 584 |
+
|
| 585 |
+
cdef:
|
| 586 |
+
shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
|
| 587 |
+
int64_t c_position = position
|
| 588 |
+
with nogil:
|
| 589 |
+
check_status(self.cuda_buffer.get().
|
| 590 |
+
CopyFromHost(c_position, c_buf.get().data(),
|
| 591 |
+
c_nbytes))
|
| 592 |
+
return c_nbytes
|
| 593 |
+
|
| 594 |
+
def copy_from_device(self, buf, int64_t position=0, int64_t nbytes=-1):
|
| 595 |
+
"""Copy data from device to device.
|
| 596 |
+
|
| 597 |
+
Parameters
|
| 598 |
+
----------
|
| 599 |
+
buf : CudaBuffer
|
| 600 |
+
Specify source device buffer.
|
| 601 |
+
position : int
|
| 602 |
+
Specify the starting position of the copy in device buffer.
|
| 603 |
+
Default: 0.
|
| 604 |
+
nbytes : int
|
| 605 |
+
Specify the number of bytes to copy. Default: -1 (all from
|
| 606 |
+
source until device buffer, starting from position, is full)
|
| 607 |
+
|
| 608 |
+
Returns
|
| 609 |
+
-------
|
| 610 |
+
nbytes : int
|
| 611 |
+
Number of bytes copied.
|
| 612 |
+
|
| 613 |
+
"""
|
| 614 |
+
if position < 0 or position > self.size:
|
| 615 |
+
raise ValueError('position argument is out-of-range')
|
| 616 |
+
cdef:
|
| 617 |
+
int64_t c_nbytes
|
| 618 |
+
|
| 619 |
+
if nbytes < 0:
|
| 620 |
+
# copy from source device buffer to device buffer starting
|
| 621 |
+
# from position until device buffer is full
|
| 622 |
+
c_nbytes = min(self.size - position, buf.size)
|
| 623 |
+
else:
|
| 624 |
+
if nbytes > buf.size:
|
| 625 |
+
raise ValueError(
|
| 626 |
+
'requested more to copy than available from device buffer')
|
| 627 |
+
if nbytes > self.size - position:
|
| 628 |
+
raise ValueError(
|
| 629 |
+
'requested more to copy than available in device buffer')
|
| 630 |
+
# copy nbytes from source device buffer to device buffer
|
| 631 |
+
# starting from position
|
| 632 |
+
c_nbytes = nbytes
|
| 633 |
+
|
| 634 |
+
cdef:
|
| 635 |
+
shared_ptr[CCudaBuffer] c_buf = pyarrow_unwrap_cudabuffer(buf)
|
| 636 |
+
int64_t c_position = position
|
| 637 |
+
shared_ptr[CCudaContext] c_src_ctx = pyarrow_unwrap_cudacontext(
|
| 638 |
+
buf.context)
|
| 639 |
+
void* c_source_data = <void*>(c_buf.get().address())
|
| 640 |
+
|
| 641 |
+
if self.context.handle != buf.context.handle:
|
| 642 |
+
with nogil:
|
| 643 |
+
check_status(self.cuda_buffer.get().
|
| 644 |
+
CopyFromAnotherDevice(c_src_ctx, c_position,
|
| 645 |
+
c_source_data, c_nbytes))
|
| 646 |
+
else:
|
| 647 |
+
with nogil:
|
| 648 |
+
check_status(self.cuda_buffer.get().
|
| 649 |
+
CopyFromDevice(c_position, c_source_data,
|
| 650 |
+
c_nbytes))
|
| 651 |
+
return c_nbytes
|
| 652 |
+
|
| 653 |
+
def export_for_ipc(self):
|
| 654 |
+
"""
|
| 655 |
+
Expose this device buffer as IPC memory which can be used in other
|
| 656 |
+
processes.
|
| 657 |
+
|
| 658 |
+
After calling this function, this device memory will not be
|
| 659 |
+
freed when the CudaBuffer is destructed.
|
| 660 |
+
|
| 661 |
+
Returns
|
| 662 |
+
-------
|
| 663 |
+
ipc_handle : IpcMemHandle
|
| 664 |
+
The exported IPC handle
|
| 665 |
+
|
| 666 |
+
"""
|
| 667 |
+
cdef shared_ptr[CCudaIpcMemHandle] handle
|
| 668 |
+
with nogil:
|
| 669 |
+
handle = GetResultValue(self.cuda_buffer.get().ExportForIpc())
|
| 670 |
+
return pyarrow_wrap_cudaipcmemhandle(handle)
|
| 671 |
+
|
| 672 |
+
@property
|
| 673 |
+
def context(self):
|
| 674 |
+
"""Returns the CUDA driver context of this buffer.
|
| 675 |
+
"""
|
| 676 |
+
return pyarrow_wrap_cudacontext(self.cuda_buffer.get().context())
|
| 677 |
+
|
| 678 |
+
def slice(self, offset=0, length=None):
|
| 679 |
+
"""Return slice of device buffer
|
| 680 |
+
|
| 681 |
+
Parameters
|
| 682 |
+
----------
|
| 683 |
+
offset : int, default 0
|
| 684 |
+
Specify offset from the start of device buffer to slice
|
| 685 |
+
length : int, default None
|
| 686 |
+
Specify the length of slice (default is until end of device
|
| 687 |
+
buffer starting from offset). If the length is larger than
|
| 688 |
+
the data available, the returned slice will have a size of
|
| 689 |
+
the available data starting from the offset.
|
| 690 |
+
|
| 691 |
+
Returns
|
| 692 |
+
-------
|
| 693 |
+
sliced : CudaBuffer
|
| 694 |
+
Zero-copy slice of device buffer.
|
| 695 |
+
|
| 696 |
+
"""
|
| 697 |
+
if offset < 0 or (self.size and offset >= self.size):
|
| 698 |
+
raise ValueError('offset argument is out-of-range')
|
| 699 |
+
cdef int64_t offset_ = offset
|
| 700 |
+
cdef int64_t size
|
| 701 |
+
if length is None:
|
| 702 |
+
size = self.size - offset_
|
| 703 |
+
elif offset + length <= self.size:
|
| 704 |
+
size = length
|
| 705 |
+
else:
|
| 706 |
+
size = self.size - offset
|
| 707 |
+
parent = pyarrow_unwrap_cudabuffer(self)
|
| 708 |
+
return pyarrow_wrap_cudabuffer(make_shared[CCudaBuffer](parent,
|
| 709 |
+
offset_, size))
|
| 710 |
+
|
| 711 |
+
def to_pybytes(self):
|
| 712 |
+
"""Return device buffer content as Python bytes.
|
| 713 |
+
"""
|
| 714 |
+
return self.copy_to_host().to_pybytes()
|
| 715 |
+
|
| 716 |
+
def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
|
| 717 |
+
# Device buffer contains data pointers on the device. Hence,
|
| 718 |
+
# cannot support buffer protocol PEP-3118 for CudaBuffer.
|
| 719 |
+
raise BufferError('buffer protocol for device buffer not supported')
|
| 720 |
+
|
| 721 |
+
|
| 722 |
+
cdef class HostBuffer(Buffer):
|
| 723 |
+
"""Device-accessible CPU memory created using cudaHostAlloc.
|
| 724 |
+
|
| 725 |
+
To create a HostBuffer instance, use
|
| 726 |
+
|
| 727 |
+
cuda.new_host_buffer(<nbytes>)
|
| 728 |
+
"""
|
| 729 |
+
|
| 730 |
+
def __init__(self):
|
| 731 |
+
raise TypeError("Do not call HostBuffer's constructor directly,"
|
| 732 |
+
" use `cuda.new_host_buffer` function instead.")
|
| 733 |
+
|
| 734 |
+
cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer):
|
| 735 |
+
self.host_buffer = buffer
|
| 736 |
+
self.init(<shared_ptr[CBuffer]> buffer)
|
| 737 |
+
|
| 738 |
+
@property
|
| 739 |
+
def size(self):
|
| 740 |
+
return self.host_buffer.get().size()
|
| 741 |
+
|
| 742 |
+
|
| 743 |
+
cdef class BufferReader(NativeFile):
|
| 744 |
+
"""File interface for zero-copy read from CUDA buffers.
|
| 745 |
+
|
| 746 |
+
Note: Read methods return pointers to device memory. This means
|
| 747 |
+
you must be careful using this interface with any Arrow code which
|
| 748 |
+
may expect to be able to do anything other than pointer arithmetic
|
| 749 |
+
on the returned buffers.
|
| 750 |
+
"""
|
| 751 |
+
|
| 752 |
+
def __cinit__(self, CudaBuffer obj):
|
| 753 |
+
self.buffer = obj
|
| 754 |
+
self.reader = new CCudaBufferReader(self.buffer.buffer)
|
| 755 |
+
self.set_random_access_file(
|
| 756 |
+
shared_ptr[CRandomAccessFile](self.reader))
|
| 757 |
+
self.is_readable = True
|
| 758 |
+
|
| 759 |
+
def read_buffer(self, nbytes=None):
|
| 760 |
+
"""Return a slice view of the underlying device buffer.
|
| 761 |
+
|
| 762 |
+
The slice will start at the current reader position and will
|
| 763 |
+
have specified size in bytes.
|
| 764 |
+
|
| 765 |
+
Parameters
|
| 766 |
+
----------
|
| 767 |
+
nbytes : int, default None
|
| 768 |
+
Specify the number of bytes to read. Default: None (read all
|
| 769 |
+
remaining bytes).
|
| 770 |
+
|
| 771 |
+
Returns
|
| 772 |
+
-------
|
| 773 |
+
cbuf : CudaBuffer
|
| 774 |
+
New device buffer.
|
| 775 |
+
|
| 776 |
+
"""
|
| 777 |
+
cdef:
|
| 778 |
+
int64_t c_nbytes
|
| 779 |
+
shared_ptr[CCudaBuffer] output
|
| 780 |
+
|
| 781 |
+
if nbytes is None:
|
| 782 |
+
c_nbytes = self.size() - self.tell()
|
| 783 |
+
else:
|
| 784 |
+
c_nbytes = nbytes
|
| 785 |
+
|
| 786 |
+
with nogil:
|
| 787 |
+
output = static_pointer_cast[CCudaBuffer, CBuffer](
|
| 788 |
+
GetResultValue(self.reader.Read(c_nbytes)))
|
| 789 |
+
|
| 790 |
+
return pyarrow_wrap_cudabuffer(output)
|
| 791 |
+
|
| 792 |
+
|
| 793 |
+
cdef class BufferWriter(NativeFile):
|
| 794 |
+
"""File interface for writing to CUDA buffers.
|
| 795 |
+
|
| 796 |
+
By default writes are unbuffered. Use set_buffer_size to enable
|
| 797 |
+
buffering.
|
| 798 |
+
"""
|
| 799 |
+
|
| 800 |
+
def __cinit__(self, CudaBuffer buffer):
|
| 801 |
+
self.buffer = buffer
|
| 802 |
+
self.writer = new CCudaBufferWriter(self.buffer.cuda_buffer)
|
| 803 |
+
self.set_output_stream(shared_ptr[COutputStream](self.writer))
|
| 804 |
+
self.is_writable = True
|
| 805 |
+
|
| 806 |
+
def writeat(self, int64_t position, object data):
|
| 807 |
+
"""Write data to buffer starting from position.
|
| 808 |
+
|
| 809 |
+
Parameters
|
| 810 |
+
----------
|
| 811 |
+
position : int
|
| 812 |
+
Specify device buffer position where the data will be
|
| 813 |
+
written.
|
| 814 |
+
data : array-like
|
| 815 |
+
Specify data, the data instance must implement buffer
|
| 816 |
+
protocol.
|
| 817 |
+
"""
|
| 818 |
+
cdef:
|
| 819 |
+
Buffer buf = as_buffer(data)
|
| 820 |
+
const uint8_t* c_data = buf.buffer.get().data()
|
| 821 |
+
int64_t c_size = buf.buffer.get().size()
|
| 822 |
+
|
| 823 |
+
with nogil:
|
| 824 |
+
check_status(self.writer.WriteAt(position, c_data, c_size))
|
| 825 |
+
|
| 826 |
+
def flush(self):
|
| 827 |
+
""" Flush the buffer stream """
|
| 828 |
+
with nogil:
|
| 829 |
+
check_status(self.writer.Flush())
|
| 830 |
+
|
| 831 |
+
def seek(self, int64_t position, int whence=0):
|
| 832 |
+
# TODO: remove this method after NativeFile.seek supports
|
| 833 |
+
# writable files.
|
| 834 |
+
cdef int64_t offset
|
| 835 |
+
|
| 836 |
+
with nogil:
|
| 837 |
+
if whence == 0:
|
| 838 |
+
offset = position
|
| 839 |
+
elif whence == 1:
|
| 840 |
+
offset = GetResultValue(self.writer.Tell())
|
| 841 |
+
offset = offset + position
|
| 842 |
+
else:
|
| 843 |
+
with gil:
|
| 844 |
+
raise ValueError("Invalid value of whence: {0}"
|
| 845 |
+
.format(whence))
|
| 846 |
+
check_status(self.writer.Seek(offset))
|
| 847 |
+
return self.tell()
|
| 848 |
+
|
| 849 |
+
@property
|
| 850 |
+
def buffer_size(self):
|
| 851 |
+
"""Returns size of host (CPU) buffer, 0 for unbuffered
|
| 852 |
+
"""
|
| 853 |
+
return self.writer.buffer_size()
|
| 854 |
+
|
| 855 |
+
@buffer_size.setter
|
| 856 |
+
def buffer_size(self, int64_t buffer_size):
|
| 857 |
+
"""Set CPU buffer size to limit calls to cudaMemcpy
|
| 858 |
+
|
| 859 |
+
Parameters
|
| 860 |
+
----------
|
| 861 |
+
buffer_size : int
|
| 862 |
+
Specify the size of CPU buffer to allocate in bytes.
|
| 863 |
+
"""
|
| 864 |
+
with nogil:
|
| 865 |
+
check_status(self.writer.SetBufferSize(buffer_size))
|
| 866 |
+
|
| 867 |
+
@property
|
| 868 |
+
def num_bytes_buffered(self):
|
| 869 |
+
"""Returns number of bytes buffered on host
|
| 870 |
+
"""
|
| 871 |
+
return self.writer.num_bytes_buffered()
|
| 872 |
+
|
| 873 |
+
# Functions
|
| 874 |
+
|
| 875 |
+
|
| 876 |
+
def new_host_buffer(const int64_t size, int device=0):
|
| 877 |
+
"""Return buffer with CUDA-accessible memory on CPU host
|
| 878 |
+
|
| 879 |
+
Parameters
|
| 880 |
+
----------
|
| 881 |
+
size : int
|
| 882 |
+
Specify the number of bytes to be allocated.
|
| 883 |
+
device : int
|
| 884 |
+
Specify GPU device number.
|
| 885 |
+
|
| 886 |
+
Returns
|
| 887 |
+
-------
|
| 888 |
+
dbuf : HostBuffer
|
| 889 |
+
Allocated host buffer
|
| 890 |
+
"""
|
| 891 |
+
cdef shared_ptr[CCudaHostBuffer] buffer
|
| 892 |
+
with nogil:
|
| 893 |
+
buffer = GetResultValue(AllocateCudaHostBuffer(device, size))
|
| 894 |
+
return pyarrow_wrap_cudahostbuffer(buffer)
|
| 895 |
+
|
| 896 |
+
|
| 897 |
+
def serialize_record_batch(object batch, object ctx):
|
| 898 |
+
""" Write record batch message to GPU device memory
|
| 899 |
+
|
| 900 |
+
Parameters
|
| 901 |
+
----------
|
| 902 |
+
batch : RecordBatch
|
| 903 |
+
Record batch to write
|
| 904 |
+
ctx : Context
|
| 905 |
+
CUDA Context to allocate device memory from
|
| 906 |
+
|
| 907 |
+
Returns
|
| 908 |
+
-------
|
| 909 |
+
dbuf : CudaBuffer
|
| 910 |
+
device buffer which contains the record batch message
|
| 911 |
+
"""
|
| 912 |
+
cdef shared_ptr[CCudaBuffer] buffer
|
| 913 |
+
cdef CRecordBatch* batch_ = pyarrow_unwrap_batch(batch).get()
|
| 914 |
+
cdef CCudaContext* ctx_ = pyarrow_unwrap_cudacontext(ctx).get()
|
| 915 |
+
with nogil:
|
| 916 |
+
buffer = GetResultValue(CudaSerializeRecordBatch(batch_[0], ctx_))
|
| 917 |
+
return pyarrow_wrap_cudabuffer(buffer)
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
def read_message(object source, pool=None):
|
| 921 |
+
""" Read Arrow IPC message located on GPU device
|
| 922 |
+
|
| 923 |
+
Parameters
|
| 924 |
+
----------
|
| 925 |
+
source : {CudaBuffer, cuda.BufferReader}
|
| 926 |
+
Device buffer or reader of device buffer.
|
| 927 |
+
pool : MemoryPool (optional)
|
| 928 |
+
Pool to allocate CPU memory for the metadata
|
| 929 |
+
|
| 930 |
+
Returns
|
| 931 |
+
-------
|
| 932 |
+
message : Message
|
| 933 |
+
The deserialized message, body still on device
|
| 934 |
+
"""
|
| 935 |
+
cdef:
|
| 936 |
+
Message result = Message.__new__(Message)
|
| 937 |
+
cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
|
| 938 |
+
if not isinstance(source, BufferReader):
|
| 939 |
+
reader = BufferReader(source)
|
| 940 |
+
with nogil:
|
| 941 |
+
result.message = move(
|
| 942 |
+
GetResultValue(ReadMessage(reader.reader, pool_)))
|
| 943 |
+
return result
|
| 944 |
+
|
| 945 |
+
|
| 946 |
+
def read_record_batch(object buffer, object schema, *,
|
| 947 |
+
DictionaryMemo dictionary_memo=None, pool=None):
|
| 948 |
+
"""Construct RecordBatch referencing IPC message located on CUDA device.
|
| 949 |
+
|
| 950 |
+
While the metadata is copied to host memory for deserialization,
|
| 951 |
+
the record batch data remains on the device.
|
| 952 |
+
|
| 953 |
+
Parameters
|
| 954 |
+
----------
|
| 955 |
+
buffer :
|
| 956 |
+
Device buffer containing the complete IPC message
|
| 957 |
+
schema : Schema
|
| 958 |
+
The schema for the record batch
|
| 959 |
+
dictionary_memo : DictionaryMemo, optional
|
| 960 |
+
If message contains dictionaries, must pass a populated
|
| 961 |
+
DictionaryMemo
|
| 962 |
+
pool : MemoryPool (optional)
|
| 963 |
+
Pool to allocate metadata from
|
| 964 |
+
|
| 965 |
+
Returns
|
| 966 |
+
-------
|
| 967 |
+
batch : RecordBatch
|
| 968 |
+
Reconstructed record batch, with device pointers
|
| 969 |
+
|
| 970 |
+
"""
|
| 971 |
+
cdef:
|
| 972 |
+
shared_ptr[CSchema] schema_ = pyarrow_unwrap_schema(schema)
|
| 973 |
+
shared_ptr[CCudaBuffer] buffer_ = pyarrow_unwrap_cudabuffer(buffer)
|
| 974 |
+
CDictionaryMemo temp_memo
|
| 975 |
+
CDictionaryMemo* arg_dict_memo
|
| 976 |
+
CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
|
| 977 |
+
shared_ptr[CRecordBatch] batch
|
| 978 |
+
|
| 979 |
+
if dictionary_memo is not None:
|
| 980 |
+
arg_dict_memo = dictionary_memo.memo
|
| 981 |
+
else:
|
| 982 |
+
arg_dict_memo = &temp_memo
|
| 983 |
+
|
| 984 |
+
with nogil:
|
| 985 |
+
batch = GetResultValue(CudaReadRecordBatch(
|
| 986 |
+
schema_, arg_dict_memo, buffer_, pool_))
|
| 987 |
+
return pyarrow_wrap_batch(batch)
|
| 988 |
+
|
| 989 |
+
|
| 990 |
+
# Public API
|
| 991 |
+
|
| 992 |
+
|
| 993 |
+
cdef public api bint pyarrow_is_buffer(object buffer):
|
| 994 |
+
return isinstance(buffer, Buffer)
|
| 995 |
+
|
| 996 |
+
# cudabuffer
|
| 997 |
+
|
| 998 |
+
cdef public api bint pyarrow_is_cudabuffer(object buffer):
|
| 999 |
+
return isinstance(buffer, CudaBuffer)
|
| 1000 |
+
|
| 1001 |
+
|
| 1002 |
+
cdef public api object \
|
| 1003 |
+
pyarrow_wrap_cudabuffer_base(const shared_ptr[CCudaBuffer]& buf, base):
|
| 1004 |
+
cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
|
| 1005 |
+
result.init_cuda(buf, base)
|
| 1006 |
+
return result
|
| 1007 |
+
|
| 1008 |
+
|
| 1009 |
+
cdef public api object \
|
| 1010 |
+
pyarrow_wrap_cudabuffer(const shared_ptr[CCudaBuffer]& buf):
|
| 1011 |
+
cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
|
| 1012 |
+
result.init_cuda(buf, None)
|
| 1013 |
+
return result
|
| 1014 |
+
|
| 1015 |
+
|
| 1016 |
+
cdef public api shared_ptr[CCudaBuffer] pyarrow_unwrap_cudabuffer(object obj):
|
| 1017 |
+
if pyarrow_is_cudabuffer(obj):
|
| 1018 |
+
return (<CudaBuffer>obj).cuda_buffer
|
| 1019 |
+
raise TypeError('expected CudaBuffer instance, got %s'
|
| 1020 |
+
% (type(obj).__name__))
|
| 1021 |
+
|
| 1022 |
+
# cudahostbuffer
|
| 1023 |
+
|
| 1024 |
+
cdef public api bint pyarrow_is_cudahostbuffer(object buffer):
|
| 1025 |
+
return isinstance(buffer, HostBuffer)
|
| 1026 |
+
|
| 1027 |
+
|
| 1028 |
+
cdef public api object \
|
| 1029 |
+
pyarrow_wrap_cudahostbuffer(const shared_ptr[CCudaHostBuffer]& buf):
|
| 1030 |
+
cdef HostBuffer result = HostBuffer.__new__(HostBuffer)
|
| 1031 |
+
result.init_host(buf)
|
| 1032 |
+
return result
|
| 1033 |
+
|
| 1034 |
+
|
| 1035 |
+
cdef public api shared_ptr[CCudaHostBuffer] \
|
| 1036 |
+
pyarrow_unwrap_cudahostbuffer(object obj):
|
| 1037 |
+
if pyarrow_is_cudahostbuffer(obj):
|
| 1038 |
+
return (<HostBuffer>obj).host_buffer
|
| 1039 |
+
raise TypeError('expected HostBuffer instance, got %s'
|
| 1040 |
+
% (type(obj).__name__))
|
| 1041 |
+
|
| 1042 |
+
# cudacontext
|
| 1043 |
+
|
| 1044 |
+
cdef public api bint pyarrow_is_cudacontext(object ctx):
|
| 1045 |
+
return isinstance(ctx, Context)
|
| 1046 |
+
|
| 1047 |
+
|
| 1048 |
+
cdef public api object \
|
| 1049 |
+
pyarrow_wrap_cudacontext(const shared_ptr[CCudaContext]& ctx):
|
| 1050 |
+
cdef Context result = Context.__new__(Context)
|
| 1051 |
+
result.init(ctx)
|
| 1052 |
+
return result
|
| 1053 |
+
|
| 1054 |
+
|
| 1055 |
+
cdef public api shared_ptr[CCudaContext] \
|
| 1056 |
+
pyarrow_unwrap_cudacontext(object obj):
|
| 1057 |
+
if pyarrow_is_cudacontext(obj):
|
| 1058 |
+
return (<Context>obj).context
|
| 1059 |
+
raise TypeError('expected Context instance, got %s'
|
| 1060 |
+
% (type(obj).__name__))
|
| 1061 |
+
|
| 1062 |
+
# cudaipcmemhandle
|
| 1063 |
+
|
| 1064 |
+
cdef public api bint pyarrow_is_cudaipcmemhandle(object handle):
|
| 1065 |
+
return isinstance(handle, IpcMemHandle)
|
| 1066 |
+
|
| 1067 |
+
|
| 1068 |
+
cdef public api object \
|
| 1069 |
+
pyarrow_wrap_cudaipcmemhandle(shared_ptr[CCudaIpcMemHandle]& h):
|
| 1070 |
+
cdef IpcMemHandle result = IpcMemHandle.__new__(IpcMemHandle)
|
| 1071 |
+
result.init(h)
|
| 1072 |
+
return result
|
| 1073 |
+
|
| 1074 |
+
|
| 1075 |
+
cdef public api shared_ptr[CCudaIpcMemHandle] \
|
| 1076 |
+
pyarrow_unwrap_cudaipcmemhandle(object obj):
|
| 1077 |
+
if pyarrow_is_cudaipcmemhandle(obj):
|
| 1078 |
+
return (<IpcMemHandle>obj).handle
|
| 1079 |
+
raise TypeError('expected IpcMemHandle instance, got %s'
|
| 1080 |
+
% (type(obj).__name__))
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_dataset_parquet.pyx
ADDED
|
@@ -0,0 +1,1053 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
|
| 20 |
+
"""Dataset support for Parquet file format."""
|
| 21 |
+
|
| 22 |
+
from cython cimport binding
|
| 23 |
+
from cython.operator cimport dereference as deref
|
| 24 |
+
|
| 25 |
+
import os
|
| 26 |
+
import warnings
|
| 27 |
+
|
| 28 |
+
import pyarrow as pa
|
| 29 |
+
from pyarrow.lib cimport *
|
| 30 |
+
from pyarrow.lib import frombytes, tobytes, is_threading_enabled
|
| 31 |
+
from pyarrow.includes.libarrow cimport *
|
| 32 |
+
from pyarrow.includes.libarrow_dataset cimport *
|
| 33 |
+
from pyarrow.includes.libarrow_dataset_parquet cimport *
|
| 34 |
+
from pyarrow._fs cimport FileSystem
|
| 35 |
+
|
| 36 |
+
from pyarrow._compute cimport Expression, _bind
|
| 37 |
+
from pyarrow._dataset cimport (
|
| 38 |
+
_make_file_source,
|
| 39 |
+
DatasetFactory,
|
| 40 |
+
FileFormat,
|
| 41 |
+
FileFragment,
|
| 42 |
+
FileWriteOptions,
|
| 43 |
+
Fragment,
|
| 44 |
+
FragmentScanOptions,
|
| 45 |
+
CacheOptions,
|
| 46 |
+
Partitioning,
|
| 47 |
+
PartitioningFactory,
|
| 48 |
+
WrittenFile
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
from pyarrow._parquet cimport (
|
| 52 |
+
_create_writer_properties, _create_arrow_writer_properties,
|
| 53 |
+
FileMetaData,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
from pyarrow._dataset_parquet_encryption import (
|
| 59 |
+
set_encryption_config, set_decryption_config, set_decryption_properties
|
| 60 |
+
)
|
| 61 |
+
parquet_encryption_enabled = True
|
| 62 |
+
except ImportError:
|
| 63 |
+
parquet_encryption_enabled = False
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
cdef Expression _true = Expression._scalar(True)
|
| 67 |
+
|
| 68 |
+
ctypedef CParquetFileWriter* _CParquetFileWriterPtr
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
cdef class ParquetFileFormat(FileFormat):
|
| 72 |
+
"""
|
| 73 |
+
FileFormat for Parquet
|
| 74 |
+
|
| 75 |
+
Parameters
|
| 76 |
+
----------
|
| 77 |
+
read_options : ParquetReadOptions
|
| 78 |
+
Read options for the file.
|
| 79 |
+
default_fragment_scan_options : ParquetFragmentScanOptions
|
| 80 |
+
Scan Options for the file.
|
| 81 |
+
**kwargs : dict
|
| 82 |
+
Additional options for read option or scan option
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
cdef:
|
| 86 |
+
CParquetFileFormat* parquet_format
|
| 87 |
+
|
| 88 |
+
def __init__(self, read_options=None,
|
| 89 |
+
default_fragment_scan_options=None,
|
| 90 |
+
**kwargs):
|
| 91 |
+
cdef:
|
| 92 |
+
shared_ptr[CParquetFileFormat] wrapped
|
| 93 |
+
CParquetFileFormatReaderOptions* options
|
| 94 |
+
|
| 95 |
+
# Read/scan options
|
| 96 |
+
read_options_args = {option: kwargs[option] for option in kwargs
|
| 97 |
+
if option in _PARQUET_READ_OPTIONS}
|
| 98 |
+
scan_args = {option: kwargs[option] for option in kwargs
|
| 99 |
+
if option not in _PARQUET_READ_OPTIONS}
|
| 100 |
+
if read_options and read_options_args:
|
| 101 |
+
duplicates = ', '.join(sorted(read_options_args))
|
| 102 |
+
raise ValueError(f'If `read_options` is given, '
|
| 103 |
+
f'cannot specify {duplicates}')
|
| 104 |
+
if default_fragment_scan_options and scan_args:
|
| 105 |
+
duplicates = ', '.join(sorted(scan_args))
|
| 106 |
+
raise ValueError(f'If `default_fragment_scan_options` is given, '
|
| 107 |
+
f'cannot specify {duplicates}')
|
| 108 |
+
|
| 109 |
+
if read_options is None:
|
| 110 |
+
read_options = ParquetReadOptions(**read_options_args)
|
| 111 |
+
elif isinstance(read_options, dict):
|
| 112 |
+
# For backwards compatibility
|
| 113 |
+
duplicates = []
|
| 114 |
+
for option, value in read_options.items():
|
| 115 |
+
if option in _PARQUET_READ_OPTIONS:
|
| 116 |
+
read_options_args[option] = value
|
| 117 |
+
else:
|
| 118 |
+
duplicates.append(option)
|
| 119 |
+
scan_args[option] = value
|
| 120 |
+
if duplicates:
|
| 121 |
+
duplicates = ", ".join(duplicates)
|
| 122 |
+
warnings.warn(f'The scan options {duplicates} should be '
|
| 123 |
+
'specified directly as keyword arguments')
|
| 124 |
+
read_options = ParquetReadOptions(**read_options_args)
|
| 125 |
+
elif not isinstance(read_options, ParquetReadOptions):
|
| 126 |
+
raise TypeError('`read_options` must be either a dictionary or an '
|
| 127 |
+
'instance of ParquetReadOptions')
|
| 128 |
+
|
| 129 |
+
if default_fragment_scan_options is None:
|
| 130 |
+
default_fragment_scan_options = ParquetFragmentScanOptions(**scan_args)
|
| 131 |
+
elif isinstance(default_fragment_scan_options, dict):
|
| 132 |
+
default_fragment_scan_options = ParquetFragmentScanOptions(
|
| 133 |
+
**default_fragment_scan_options)
|
| 134 |
+
elif not isinstance(default_fragment_scan_options,
|
| 135 |
+
ParquetFragmentScanOptions):
|
| 136 |
+
raise TypeError('`default_fragment_scan_options` must be either a '
|
| 137 |
+
'dictionary or an instance of '
|
| 138 |
+
'ParquetFragmentScanOptions')
|
| 139 |
+
|
| 140 |
+
wrapped = make_shared[CParquetFileFormat]()
|
| 141 |
+
|
| 142 |
+
options = &(wrapped.get().reader_options)
|
| 143 |
+
if read_options.dictionary_columns is not None:
|
| 144 |
+
for column in read_options.dictionary_columns:
|
| 145 |
+
options.dict_columns.insert(tobytes(column))
|
| 146 |
+
options.coerce_int96_timestamp_unit = \
|
| 147 |
+
read_options._coerce_int96_timestamp_unit
|
| 148 |
+
|
| 149 |
+
self.init(<shared_ptr[CFileFormat]> wrapped)
|
| 150 |
+
self.default_fragment_scan_options = default_fragment_scan_options
|
| 151 |
+
|
| 152 |
+
cdef void init(self, const shared_ptr[CFileFormat]& sp):
|
| 153 |
+
FileFormat.init(self, sp)
|
| 154 |
+
self.parquet_format = <CParquetFileFormat*> sp.get()
|
| 155 |
+
|
| 156 |
+
cdef WrittenFile _finish_write(self, path, base_dir,
|
| 157 |
+
CFileWriter* file_writer):
|
| 158 |
+
cdef:
|
| 159 |
+
FileMetaData parquet_metadata
|
| 160 |
+
CParquetFileWriter* parquet_file_writer
|
| 161 |
+
|
| 162 |
+
parquet_metadata = None
|
| 163 |
+
parquet_file_writer = dynamic_cast[_CParquetFileWriterPtr](file_writer)
|
| 164 |
+
with nogil:
|
| 165 |
+
metadata = deref(
|
| 166 |
+
deref(parquet_file_writer).parquet_writer()).metadata()
|
| 167 |
+
if metadata:
|
| 168 |
+
parquet_metadata = FileMetaData()
|
| 169 |
+
parquet_metadata.init(metadata)
|
| 170 |
+
parquet_metadata.set_file_path(os.path.relpath(path, base_dir))
|
| 171 |
+
|
| 172 |
+
size = GetResultValue(file_writer.GetBytesWritten())
|
| 173 |
+
|
| 174 |
+
return WrittenFile(path, parquet_metadata, size)
|
| 175 |
+
|
| 176 |
+
@property
|
| 177 |
+
def read_options(self):
|
| 178 |
+
cdef CParquetFileFormatReaderOptions* options
|
| 179 |
+
options = &self.parquet_format.reader_options
|
| 180 |
+
parquet_read_options = ParquetReadOptions(
|
| 181 |
+
dictionary_columns={frombytes(col)
|
| 182 |
+
for col in options.dict_columns},
|
| 183 |
+
)
|
| 184 |
+
# Read options getter/setter works with strings so setting
|
| 185 |
+
# the private property which uses the C Type
|
| 186 |
+
parquet_read_options._coerce_int96_timestamp_unit = \
|
| 187 |
+
options.coerce_int96_timestamp_unit
|
| 188 |
+
return parquet_read_options
|
| 189 |
+
|
| 190 |
+
def make_write_options(self, **kwargs):
|
| 191 |
+
"""
|
| 192 |
+
Parameters
|
| 193 |
+
----------
|
| 194 |
+
**kwargs : dict
|
| 195 |
+
|
| 196 |
+
Returns
|
| 197 |
+
-------
|
| 198 |
+
pyarrow.dataset.FileWriteOptions
|
| 199 |
+
"""
|
| 200 |
+
# Safeguard from calling make_write_options as a static class method
|
| 201 |
+
if not isinstance(self, ParquetFileFormat):
|
| 202 |
+
raise TypeError("make_write_options() should be called on "
|
| 203 |
+
"an instance of ParquetFileFormat")
|
| 204 |
+
opts = FileFormat.make_write_options(self)
|
| 205 |
+
(<ParquetFileWriteOptions> opts).update(**kwargs)
|
| 206 |
+
return opts
|
| 207 |
+
|
| 208 |
+
cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
|
| 209 |
+
if options.type_name == 'parquet':
|
| 210 |
+
self.parquet_format.default_fragment_scan_options = options.wrapped
|
| 211 |
+
else:
|
| 212 |
+
super()._set_default_fragment_scan_options(options)
|
| 213 |
+
|
| 214 |
+
def equals(self, ParquetFileFormat other):
|
| 215 |
+
"""
|
| 216 |
+
Parameters
|
| 217 |
+
----------
|
| 218 |
+
other : pyarrow.dataset.ParquetFileFormat
|
| 219 |
+
|
| 220 |
+
Returns
|
| 221 |
+
-------
|
| 222 |
+
bool
|
| 223 |
+
"""
|
| 224 |
+
return (
|
| 225 |
+
self.read_options.equals(other.read_options) and
|
| 226 |
+
self.default_fragment_scan_options ==
|
| 227 |
+
other.default_fragment_scan_options
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
@property
|
| 231 |
+
def default_extname(self):
|
| 232 |
+
return "parquet"
|
| 233 |
+
|
| 234 |
+
def __reduce__(self):
|
| 235 |
+
return ParquetFileFormat, (self.read_options,
|
| 236 |
+
self.default_fragment_scan_options)
|
| 237 |
+
|
| 238 |
+
def __repr__(self):
|
| 239 |
+
return f"<ParquetFileFormat read_options={self.read_options}>"
|
| 240 |
+
|
| 241 |
+
def make_fragment(self, file, filesystem=None,
|
| 242 |
+
Expression partition_expression=None, row_groups=None, *, file_size=None):
|
| 243 |
+
"""
|
| 244 |
+
Make a FileFragment from a given file.
|
| 245 |
+
|
| 246 |
+
Parameters
|
| 247 |
+
----------
|
| 248 |
+
file : file-like object, path-like or str
|
| 249 |
+
The file or file path to make a fragment from.
|
| 250 |
+
filesystem : Filesystem, optional
|
| 251 |
+
If `filesystem` is given, `file` must be a string and specifies
|
| 252 |
+
the path of the file to read from the filesystem.
|
| 253 |
+
partition_expression : Expression, optional
|
| 254 |
+
An expression that is guaranteed true for all rows in the fragment. Allows
|
| 255 |
+
fragment to be potentially skipped while scanning with a filter.
|
| 256 |
+
row_groups : Iterable, optional
|
| 257 |
+
The indices of the row groups to include
|
| 258 |
+
file_size : int, optional
|
| 259 |
+
The size of the file in bytes. Can improve performance with high-latency filesystems
|
| 260 |
+
when file size needs to be known before reading.
|
| 261 |
+
|
| 262 |
+
Returns
|
| 263 |
+
-------
|
| 264 |
+
fragment : Fragment
|
| 265 |
+
The file fragment
|
| 266 |
+
"""
|
| 267 |
+
cdef:
|
| 268 |
+
vector[int] c_row_groups
|
| 269 |
+
if partition_expression is None:
|
| 270 |
+
partition_expression = _true
|
| 271 |
+
if row_groups is None:
|
| 272 |
+
return super().make_fragment(file, filesystem,
|
| 273 |
+
partition_expression, file_size=file_size)
|
| 274 |
+
|
| 275 |
+
c_source = _make_file_source(file, filesystem, file_size)
|
| 276 |
+
c_row_groups = [<int> row_group for row_group in set(row_groups)]
|
| 277 |
+
|
| 278 |
+
c_fragment = <shared_ptr[CFragment]> GetResultValue(
|
| 279 |
+
self.parquet_format.MakeFragment(move(c_source),
|
| 280 |
+
partition_expression.unwrap(),
|
| 281 |
+
<shared_ptr[CSchema]>nullptr,
|
| 282 |
+
move(c_row_groups)))
|
| 283 |
+
return Fragment.wrap(move(c_fragment))
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
class RowGroupInfo:
|
| 287 |
+
"""
|
| 288 |
+
A wrapper class for RowGroup information
|
| 289 |
+
|
| 290 |
+
Parameters
|
| 291 |
+
----------
|
| 292 |
+
id : integer
|
| 293 |
+
The group ID.
|
| 294 |
+
metadata : FileMetaData
|
| 295 |
+
The rowgroup metadata.
|
| 296 |
+
schema : Schema
|
| 297 |
+
Schema of the rows.
|
| 298 |
+
"""
|
| 299 |
+
|
| 300 |
+
def __init__(self, id, metadata, schema):
|
| 301 |
+
self.id = id
|
| 302 |
+
self.metadata = metadata
|
| 303 |
+
self.schema = schema
|
| 304 |
+
|
| 305 |
+
@property
|
| 306 |
+
def num_rows(self):
|
| 307 |
+
return self.metadata.num_rows
|
| 308 |
+
|
| 309 |
+
@property
|
| 310 |
+
def total_byte_size(self):
|
| 311 |
+
return self.metadata.total_byte_size
|
| 312 |
+
|
| 313 |
+
@property
|
| 314 |
+
def statistics(self):
|
| 315 |
+
def name_stats(i):
|
| 316 |
+
col = self.metadata.column(i)
|
| 317 |
+
|
| 318 |
+
stats = col.statistics
|
| 319 |
+
if stats is None or not stats.has_min_max:
|
| 320 |
+
return None, None
|
| 321 |
+
|
| 322 |
+
name = col.path_in_schema
|
| 323 |
+
field_index = self.schema.get_field_index(name)
|
| 324 |
+
if field_index < 0:
|
| 325 |
+
return None, None
|
| 326 |
+
|
| 327 |
+
typ = self.schema.field(field_index).type
|
| 328 |
+
return col.path_in_schema, {
|
| 329 |
+
'min': pa.scalar(stats.min, type=typ).as_py(),
|
| 330 |
+
'max': pa.scalar(stats.max, type=typ).as_py()
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
return {
|
| 334 |
+
name: stats for name, stats
|
| 335 |
+
in map(name_stats, range(self.metadata.num_columns))
|
| 336 |
+
if stats is not None
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
def __repr__(self):
|
| 340 |
+
return "RowGroupInfo({})".format(self.id)
|
| 341 |
+
|
| 342 |
+
def __eq__(self, other):
|
| 343 |
+
if isinstance(other, int):
|
| 344 |
+
return self.id == other
|
| 345 |
+
if not isinstance(other, RowGroupInfo):
|
| 346 |
+
return False
|
| 347 |
+
return self.id == other.id
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
cdef class ParquetFileFragment(FileFragment):
|
| 351 |
+
"""A Fragment representing a parquet file."""
|
| 352 |
+
|
| 353 |
+
cdef:
|
| 354 |
+
CParquetFileFragment* parquet_file_fragment
|
| 355 |
+
|
| 356 |
+
cdef void init(self, const shared_ptr[CFragment]& sp):
|
| 357 |
+
FileFragment.init(self, sp)
|
| 358 |
+
self.parquet_file_fragment = <CParquetFileFragment*> sp.get()
|
| 359 |
+
|
| 360 |
+
def __reduce__(self):
|
| 361 |
+
buffer = self.buffer
|
| 362 |
+
# parquet_file_fragment.row_groups() is empty if the metadata
|
| 363 |
+
# information of the file is not yet populated
|
| 364 |
+
if not bool(self.parquet_file_fragment.row_groups()):
|
| 365 |
+
row_groups = None
|
| 366 |
+
else:
|
| 367 |
+
row_groups = [row_group.id for row_group in self.row_groups]
|
| 368 |
+
|
| 369 |
+
return self.format.make_fragment, (
|
| 370 |
+
self.path if buffer is None else buffer,
|
| 371 |
+
self.filesystem,
|
| 372 |
+
self.partition_expression,
|
| 373 |
+
row_groups
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
def ensure_complete_metadata(self):
|
| 377 |
+
"""
|
| 378 |
+
Ensure that all metadata (statistics, physical schema, ...) have
|
| 379 |
+
been read and cached in this fragment.
|
| 380 |
+
"""
|
| 381 |
+
with nogil:
|
| 382 |
+
check_status(self.parquet_file_fragment.EnsureCompleteMetadata())
|
| 383 |
+
|
| 384 |
+
@property
|
| 385 |
+
def row_groups(self):
|
| 386 |
+
metadata = self.metadata
|
| 387 |
+
cdef vector[int] row_groups = self.parquet_file_fragment.row_groups()
|
| 388 |
+
return [RowGroupInfo(i, metadata.row_group(i), self.physical_schema)
|
| 389 |
+
for i in row_groups]
|
| 390 |
+
|
| 391 |
+
@property
|
| 392 |
+
def metadata(self):
|
| 393 |
+
self.ensure_complete_metadata()
|
| 394 |
+
cdef FileMetaData metadata = FileMetaData()
|
| 395 |
+
metadata.init(self.parquet_file_fragment.metadata())
|
| 396 |
+
return metadata
|
| 397 |
+
|
| 398 |
+
@property
|
| 399 |
+
def num_row_groups(self):
|
| 400 |
+
"""
|
| 401 |
+
Return the number of row groups viewed by this fragment (not the
|
| 402 |
+
number of row groups in the origin file).
|
| 403 |
+
"""
|
| 404 |
+
self.ensure_complete_metadata()
|
| 405 |
+
return self.parquet_file_fragment.row_groups().size()
|
| 406 |
+
|
| 407 |
+
def split_by_row_group(self, Expression filter=None,
|
| 408 |
+
Schema schema=None):
|
| 409 |
+
"""
|
| 410 |
+
Split the fragment into multiple fragments.
|
| 411 |
+
|
| 412 |
+
Yield a Fragment wrapping each row group in this ParquetFileFragment.
|
| 413 |
+
Row groups will be excluded whose metadata contradicts the optional
|
| 414 |
+
filter.
|
| 415 |
+
|
| 416 |
+
Parameters
|
| 417 |
+
----------
|
| 418 |
+
filter : Expression, default None
|
| 419 |
+
Only include the row groups which satisfy this predicate (using
|
| 420 |
+
the Parquet RowGroup statistics).
|
| 421 |
+
schema : Schema, default None
|
| 422 |
+
Schema to use when filtering row groups. Defaults to the
|
| 423 |
+
Fragment's physical schema
|
| 424 |
+
|
| 425 |
+
Returns
|
| 426 |
+
-------
|
| 427 |
+
A list of Fragments
|
| 428 |
+
"""
|
| 429 |
+
cdef:
|
| 430 |
+
vector[shared_ptr[CFragment]] c_fragments
|
| 431 |
+
CExpression c_filter
|
| 432 |
+
shared_ptr[CFragment] c_fragment
|
| 433 |
+
|
| 434 |
+
schema = schema or self.physical_schema
|
| 435 |
+
c_filter = _bind(filter, schema)
|
| 436 |
+
with nogil:
|
| 437 |
+
c_fragments = move(GetResultValue(
|
| 438 |
+
self.parquet_file_fragment.SplitByRowGroup(move(c_filter))))
|
| 439 |
+
|
| 440 |
+
return [Fragment.wrap(c_fragment) for c_fragment in c_fragments]
|
| 441 |
+
|
| 442 |
+
def subset(self, Expression filter=None, Schema schema=None,
|
| 443 |
+
object row_group_ids=None):
|
| 444 |
+
"""
|
| 445 |
+
Create a subset of the fragment (viewing a subset of the row groups).
|
| 446 |
+
|
| 447 |
+
Subset can be specified by either a filter predicate (with optional
|
| 448 |
+
schema) or by a list of row group IDs. Note that when using a filter,
|
| 449 |
+
the resulting fragment can be empty (viewing no row groups).
|
| 450 |
+
|
| 451 |
+
Parameters
|
| 452 |
+
----------
|
| 453 |
+
filter : Expression, default None
|
| 454 |
+
Only include the row groups which satisfy this predicate (using
|
| 455 |
+
the Parquet RowGroup statistics).
|
| 456 |
+
schema : Schema, default None
|
| 457 |
+
Schema to use when filtering row groups. Defaults to the
|
| 458 |
+
Fragment's physical schema
|
| 459 |
+
row_group_ids : list of ints
|
| 460 |
+
The row group IDs to include in the subset. Can only be specified
|
| 461 |
+
if `filter` is None.
|
| 462 |
+
|
| 463 |
+
Returns
|
| 464 |
+
-------
|
| 465 |
+
ParquetFileFragment
|
| 466 |
+
"""
|
| 467 |
+
cdef:
|
| 468 |
+
CExpression c_filter
|
| 469 |
+
vector[int] c_row_group_ids
|
| 470 |
+
shared_ptr[CFragment] c_fragment
|
| 471 |
+
|
| 472 |
+
if filter is not None and row_group_ids is not None:
|
| 473 |
+
raise ValueError(
|
| 474 |
+
"Cannot specify both 'filter' and 'row_group_ids'."
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
if filter is not None:
|
| 478 |
+
schema = schema or self.physical_schema
|
| 479 |
+
c_filter = _bind(filter, schema)
|
| 480 |
+
with nogil:
|
| 481 |
+
c_fragment = move(GetResultValue(
|
| 482 |
+
self.parquet_file_fragment.SubsetWithFilter(
|
| 483 |
+
move(c_filter))))
|
| 484 |
+
elif row_group_ids is not None:
|
| 485 |
+
c_row_group_ids = [
|
| 486 |
+
<int> row_group for row_group in sorted(set(row_group_ids))
|
| 487 |
+
]
|
| 488 |
+
with nogil:
|
| 489 |
+
c_fragment = move(GetResultValue(
|
| 490 |
+
self.parquet_file_fragment.SubsetWithIds(
|
| 491 |
+
move(c_row_group_ids))))
|
| 492 |
+
else:
|
| 493 |
+
raise ValueError(
|
| 494 |
+
"Need to specify one of 'filter' or 'row_group_ids'"
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
return Fragment.wrap(c_fragment)
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
cdef class ParquetReadOptions(_Weakrefable):
|
| 501 |
+
"""
|
| 502 |
+
Parquet format specific options for reading.
|
| 503 |
+
|
| 504 |
+
Parameters
|
| 505 |
+
----------
|
| 506 |
+
dictionary_columns : list of string, default None
|
| 507 |
+
Names of columns which should be dictionary encoded as
|
| 508 |
+
they are read
|
| 509 |
+
coerce_int96_timestamp_unit : str, default None
|
| 510 |
+
Cast timestamps that are stored in INT96 format to a particular
|
| 511 |
+
resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
|
| 512 |
+
and therefore INT96 timestamps will be inferred as timestamps
|
| 513 |
+
in nanoseconds
|
| 514 |
+
"""
|
| 515 |
+
|
| 516 |
+
cdef public:
|
| 517 |
+
set dictionary_columns
|
| 518 |
+
TimeUnit _coerce_int96_timestamp_unit
|
| 519 |
+
|
| 520 |
+
# Also see _PARQUET_READ_OPTIONS
|
| 521 |
+
def __init__(self, dictionary_columns=None,
|
| 522 |
+
coerce_int96_timestamp_unit=None):
|
| 523 |
+
self.dictionary_columns = set(dictionary_columns or set())
|
| 524 |
+
self.coerce_int96_timestamp_unit = coerce_int96_timestamp_unit
|
| 525 |
+
|
| 526 |
+
@property
|
| 527 |
+
def coerce_int96_timestamp_unit(self):
|
| 528 |
+
return timeunit_to_string(self._coerce_int96_timestamp_unit)
|
| 529 |
+
|
| 530 |
+
@coerce_int96_timestamp_unit.setter
|
| 531 |
+
def coerce_int96_timestamp_unit(self, unit):
|
| 532 |
+
if unit is not None:
|
| 533 |
+
self._coerce_int96_timestamp_unit = string_to_timeunit(unit)
|
| 534 |
+
else:
|
| 535 |
+
self._coerce_int96_timestamp_unit = TimeUnit_NANO
|
| 536 |
+
|
| 537 |
+
def equals(self, ParquetReadOptions other):
|
| 538 |
+
"""
|
| 539 |
+
Parameters
|
| 540 |
+
----------
|
| 541 |
+
other : pyarrow.dataset.ParquetReadOptions
|
| 542 |
+
|
| 543 |
+
Returns
|
| 544 |
+
-------
|
| 545 |
+
bool
|
| 546 |
+
"""
|
| 547 |
+
return (self.dictionary_columns == other.dictionary_columns and
|
| 548 |
+
self.coerce_int96_timestamp_unit ==
|
| 549 |
+
other.coerce_int96_timestamp_unit)
|
| 550 |
+
|
| 551 |
+
def __eq__(self, other):
|
| 552 |
+
try:
|
| 553 |
+
return self.equals(other)
|
| 554 |
+
except TypeError:
|
| 555 |
+
return False
|
| 556 |
+
|
| 557 |
+
def __repr__(self):
|
| 558 |
+
return (
|
| 559 |
+
f"<ParquetReadOptions"
|
| 560 |
+
f" dictionary_columns={self.dictionary_columns}"
|
| 561 |
+
f" coerce_int96_timestamp_unit={self.coerce_int96_timestamp_unit}>"
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
cdef class ParquetFileWriteOptions(FileWriteOptions):
|
| 566 |
+
|
| 567 |
+
def update(self, **kwargs):
|
| 568 |
+
"""
|
| 569 |
+
Parameters
|
| 570 |
+
----------
|
| 571 |
+
**kwargs : dict
|
| 572 |
+
"""
|
| 573 |
+
arrow_fields = {
|
| 574 |
+
"use_deprecated_int96_timestamps",
|
| 575 |
+
"coerce_timestamps",
|
| 576 |
+
"allow_truncated_timestamps",
|
| 577 |
+
"use_compliant_nested_type",
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
setters = set()
|
| 581 |
+
for name, value in kwargs.items():
|
| 582 |
+
if name not in self._properties:
|
| 583 |
+
raise TypeError("unexpected parquet write option: " + name)
|
| 584 |
+
self._properties[name] = value
|
| 585 |
+
if name in arrow_fields:
|
| 586 |
+
setters.add(self._set_arrow_properties)
|
| 587 |
+
elif name == "encryption_config" and value is not None:
|
| 588 |
+
setters.add(self._set_encryption_config)
|
| 589 |
+
else:
|
| 590 |
+
setters.add(self._set_properties)
|
| 591 |
+
|
| 592 |
+
for setter in setters:
|
| 593 |
+
setter()
|
| 594 |
+
|
| 595 |
+
def _set_properties(self):
|
| 596 |
+
cdef CParquetFileWriteOptions* opts = self.parquet_options
|
| 597 |
+
|
| 598 |
+
opts.writer_properties = _create_writer_properties(
|
| 599 |
+
use_dictionary=self._properties["use_dictionary"],
|
| 600 |
+
compression=self._properties["compression"],
|
| 601 |
+
version=self._properties["version"],
|
| 602 |
+
write_statistics=self._properties["write_statistics"],
|
| 603 |
+
data_page_size=self._properties["data_page_size"],
|
| 604 |
+
compression_level=self._properties["compression_level"],
|
| 605 |
+
use_byte_stream_split=(
|
| 606 |
+
self._properties["use_byte_stream_split"]
|
| 607 |
+
),
|
| 608 |
+
column_encoding=self._properties["column_encoding"],
|
| 609 |
+
data_page_version=self._properties["data_page_version"],
|
| 610 |
+
encryption_properties=self._properties["encryption_properties"],
|
| 611 |
+
write_batch_size=self._properties["write_batch_size"],
|
| 612 |
+
dictionary_pagesize_limit=self._properties["dictionary_pagesize_limit"],
|
| 613 |
+
write_page_index=self._properties["write_page_index"],
|
| 614 |
+
write_page_checksum=self._properties["write_page_checksum"],
|
| 615 |
+
sorting_columns=self._properties["sorting_columns"],
|
| 616 |
+
store_decimal_as_integer=self._properties["store_decimal_as_integer"],
|
| 617 |
+
)
|
| 618 |
+
|
| 619 |
+
def _set_arrow_properties(self):
|
| 620 |
+
cdef CParquetFileWriteOptions* opts = self.parquet_options
|
| 621 |
+
|
| 622 |
+
opts.arrow_writer_properties = _create_arrow_writer_properties(
|
| 623 |
+
use_deprecated_int96_timestamps=(
|
| 624 |
+
self._properties["use_deprecated_int96_timestamps"]
|
| 625 |
+
),
|
| 626 |
+
coerce_timestamps=self._properties["coerce_timestamps"],
|
| 627 |
+
allow_truncated_timestamps=(
|
| 628 |
+
self._properties["allow_truncated_timestamps"]
|
| 629 |
+
),
|
| 630 |
+
writer_engine_version="V2",
|
| 631 |
+
use_compliant_nested_type=(
|
| 632 |
+
self._properties["use_compliant_nested_type"]
|
| 633 |
+
)
|
| 634 |
+
)
|
| 635 |
+
|
| 636 |
+
def _set_encryption_config(self):
|
| 637 |
+
if not parquet_encryption_enabled:
|
| 638 |
+
raise NotImplementedError(
|
| 639 |
+
"Encryption is not enabled in your installation of pyarrow, but an "
|
| 640 |
+
"encryption_config was provided."
|
| 641 |
+
)
|
| 642 |
+
set_encryption_config(self, self._properties["encryption_config"])
|
| 643 |
+
|
| 644 |
+
cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
|
| 645 |
+
FileWriteOptions.init(self, sp)
|
| 646 |
+
self.parquet_options = <CParquetFileWriteOptions*> sp.get()
|
| 647 |
+
self._properties = dict(
|
| 648 |
+
use_dictionary=True,
|
| 649 |
+
compression="snappy",
|
| 650 |
+
version="2.6",
|
| 651 |
+
write_statistics=None,
|
| 652 |
+
data_page_size=None,
|
| 653 |
+
compression_level=None,
|
| 654 |
+
use_byte_stream_split=False,
|
| 655 |
+
column_encoding=None,
|
| 656 |
+
data_page_version="1.0",
|
| 657 |
+
use_deprecated_int96_timestamps=False,
|
| 658 |
+
coerce_timestamps=None,
|
| 659 |
+
allow_truncated_timestamps=False,
|
| 660 |
+
use_compliant_nested_type=True,
|
| 661 |
+
encryption_properties=None,
|
| 662 |
+
write_batch_size=None,
|
| 663 |
+
dictionary_pagesize_limit=None,
|
| 664 |
+
write_page_index=False,
|
| 665 |
+
encryption_config=None,
|
| 666 |
+
write_page_checksum=False,
|
| 667 |
+
sorting_columns=None,
|
| 668 |
+
store_decimal_as_integer=False,
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
self._set_properties()
|
| 672 |
+
self._set_arrow_properties()
|
| 673 |
+
|
| 674 |
+
def __repr__(self):
|
| 675 |
+
return "<pyarrow.dataset.ParquetFileWriteOptions {0}>".format(
|
| 676 |
+
" ".join([f"{key}={value}" for key, value in self._properties.items()])
|
| 677 |
+
)
|
| 678 |
+
|
| 679 |
+
|
| 680 |
+
cdef set _PARQUET_READ_OPTIONS = {
|
| 681 |
+
'dictionary_columns', 'coerce_int96_timestamp_unit'
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
cdef class ParquetFragmentScanOptions(FragmentScanOptions):
|
| 686 |
+
"""
|
| 687 |
+
Scan-specific options for Parquet fragments.
|
| 688 |
+
|
| 689 |
+
Parameters
|
| 690 |
+
----------
|
| 691 |
+
use_buffered_stream : bool, default False
|
| 692 |
+
Read files through buffered input streams rather than loading entire
|
| 693 |
+
row groups at once. This may be enabled to reduce memory overhead.
|
| 694 |
+
Disabled by default.
|
| 695 |
+
buffer_size : int, default 8192
|
| 696 |
+
Size of buffered stream, if enabled. Default is 8KB.
|
| 697 |
+
pre_buffer : bool, default True
|
| 698 |
+
If enabled, pre-buffer the raw Parquet data instead of issuing one
|
| 699 |
+
read per column chunk. This can improve performance on high-latency
|
| 700 |
+
filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
|
| 701 |
+
parallel using a background I/O thread pool.
|
| 702 |
+
Set to False if you want to prioritize minimal memory usage
|
| 703 |
+
over maximum speed.
|
| 704 |
+
cache_options : pyarrow.CacheOptions, default None
|
| 705 |
+
Cache options used when pre_buffer is enabled. The default values should
|
| 706 |
+
be good for most use cases. You may want to adjust these for example if
|
| 707 |
+
you have exceptionally high latency to the file system.
|
| 708 |
+
thrift_string_size_limit : int, default None
|
| 709 |
+
If not None, override the maximum total string size allocated
|
| 710 |
+
when decoding Thrift structures. The default limit should be
|
| 711 |
+
sufficient for most Parquet files.
|
| 712 |
+
thrift_container_size_limit : int, default None
|
| 713 |
+
If not None, override the maximum total size of containers allocated
|
| 714 |
+
when decoding Thrift structures. The default limit should be
|
| 715 |
+
sufficient for most Parquet files.
|
| 716 |
+
decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
|
| 717 |
+
If not None, use the provided ParquetDecryptionConfig to decrypt the
|
| 718 |
+
Parquet file.
|
| 719 |
+
decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
|
| 720 |
+
If not None, use the provided FileDecryptionProperties to decrypt encrypted
|
| 721 |
+
Parquet file.
|
| 722 |
+
page_checksum_verification : bool, default False
|
| 723 |
+
If True, verify the page checksum for each page read from the file.
|
| 724 |
+
"""
|
| 725 |
+
|
| 726 |
+
# Avoid mistakingly creating attributes
|
| 727 |
+
__slots__ = ()
|
| 728 |
+
|
| 729 |
+
def __init__(self, *, bint use_buffered_stream=False,
|
| 730 |
+
buffer_size=8192,
|
| 731 |
+
bint pre_buffer=True,
|
| 732 |
+
cache_options=None,
|
| 733 |
+
thrift_string_size_limit=None,
|
| 734 |
+
thrift_container_size_limit=None,
|
| 735 |
+
decryption_config=None,
|
| 736 |
+
decryption_properties=None,
|
| 737 |
+
bint page_checksum_verification=False):
|
| 738 |
+
self.init(shared_ptr[CFragmentScanOptions](
|
| 739 |
+
new CParquetFragmentScanOptions()))
|
| 740 |
+
self.use_buffered_stream = use_buffered_stream
|
| 741 |
+
self.buffer_size = buffer_size
|
| 742 |
+
if pre_buffer and not is_threading_enabled():
|
| 743 |
+
pre_buffer = False
|
| 744 |
+
self.pre_buffer = pre_buffer
|
| 745 |
+
if cache_options is not None:
|
| 746 |
+
self.cache_options = cache_options
|
| 747 |
+
if thrift_string_size_limit is not None:
|
| 748 |
+
self.thrift_string_size_limit = thrift_string_size_limit
|
| 749 |
+
if thrift_container_size_limit is not None:
|
| 750 |
+
self.thrift_container_size_limit = thrift_container_size_limit
|
| 751 |
+
if decryption_config is not None:
|
| 752 |
+
self.parquet_decryption_config = decryption_config
|
| 753 |
+
if decryption_properties is not None:
|
| 754 |
+
self.decryption_properties = decryption_properties
|
| 755 |
+
self.page_checksum_verification = page_checksum_verification
|
| 756 |
+
|
| 757 |
+
cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
|
| 758 |
+
FragmentScanOptions.init(self, sp)
|
| 759 |
+
self.parquet_options = <CParquetFragmentScanOptions*> sp.get()
|
| 760 |
+
|
| 761 |
+
cdef CReaderProperties* reader_properties(self):
|
| 762 |
+
return self.parquet_options.reader_properties.get()
|
| 763 |
+
|
| 764 |
+
cdef ArrowReaderProperties* arrow_reader_properties(self):
|
| 765 |
+
return self.parquet_options.arrow_reader_properties.get()
|
| 766 |
+
|
| 767 |
+
@property
|
| 768 |
+
def use_buffered_stream(self):
|
| 769 |
+
return self.reader_properties().is_buffered_stream_enabled()
|
| 770 |
+
|
| 771 |
+
@use_buffered_stream.setter
|
| 772 |
+
def use_buffered_stream(self, bint use_buffered_stream):
|
| 773 |
+
if use_buffered_stream:
|
| 774 |
+
self.reader_properties().enable_buffered_stream()
|
| 775 |
+
else:
|
| 776 |
+
self.reader_properties().disable_buffered_stream()
|
| 777 |
+
|
| 778 |
+
@property
|
| 779 |
+
def buffer_size(self):
|
| 780 |
+
return self.reader_properties().buffer_size()
|
| 781 |
+
|
| 782 |
+
@buffer_size.setter
|
| 783 |
+
def buffer_size(self, buffer_size):
|
| 784 |
+
if buffer_size <= 0:
|
| 785 |
+
raise ValueError("Buffer size must be larger than zero")
|
| 786 |
+
self.reader_properties().set_buffer_size(buffer_size)
|
| 787 |
+
|
| 788 |
+
@property
|
| 789 |
+
def pre_buffer(self):
|
| 790 |
+
return self.arrow_reader_properties().pre_buffer()
|
| 791 |
+
|
| 792 |
+
@pre_buffer.setter
|
| 793 |
+
def pre_buffer(self, bint pre_buffer):
|
| 794 |
+
if pre_buffer and not is_threading_enabled():
|
| 795 |
+
return
|
| 796 |
+
self.arrow_reader_properties().set_pre_buffer(pre_buffer)
|
| 797 |
+
|
| 798 |
+
@property
|
| 799 |
+
def cache_options(self):
|
| 800 |
+
return CacheOptions.wrap(self.arrow_reader_properties().cache_options())
|
| 801 |
+
|
| 802 |
+
@cache_options.setter
|
| 803 |
+
def cache_options(self, CacheOptions options):
|
| 804 |
+
self.arrow_reader_properties().set_cache_options(options.unwrap())
|
| 805 |
+
|
| 806 |
+
@property
|
| 807 |
+
def thrift_string_size_limit(self):
|
| 808 |
+
return self.reader_properties().thrift_string_size_limit()
|
| 809 |
+
|
| 810 |
+
@thrift_string_size_limit.setter
|
| 811 |
+
def thrift_string_size_limit(self, size):
|
| 812 |
+
if size <= 0:
|
| 813 |
+
raise ValueError("size must be larger than zero")
|
| 814 |
+
self.reader_properties().set_thrift_string_size_limit(size)
|
| 815 |
+
|
| 816 |
+
@property
|
| 817 |
+
def thrift_container_size_limit(self):
|
| 818 |
+
return self.reader_properties().thrift_container_size_limit()
|
| 819 |
+
|
| 820 |
+
@thrift_container_size_limit.setter
|
| 821 |
+
def thrift_container_size_limit(self, size):
|
| 822 |
+
if size <= 0:
|
| 823 |
+
raise ValueError("size must be larger than zero")
|
| 824 |
+
self.reader_properties().set_thrift_container_size_limit(size)
|
| 825 |
+
|
| 826 |
+
@property
|
| 827 |
+
def decryption_properties(self):
|
| 828 |
+
if not parquet_encryption_enabled:
|
| 829 |
+
raise NotImplementedError(
|
| 830 |
+
"Unable to access encryption features. "
|
| 831 |
+
"Encryption is not enabled in your installation of pyarrow."
|
| 832 |
+
)
|
| 833 |
+
return self._decryption_properties
|
| 834 |
+
|
| 835 |
+
@decryption_properties.setter
|
| 836 |
+
def decryption_properties(self, config):
|
| 837 |
+
if not parquet_encryption_enabled:
|
| 838 |
+
raise NotImplementedError(
|
| 839 |
+
"Encryption is not enabled in your installation of pyarrow, but "
|
| 840 |
+
"decryption_properties were provided."
|
| 841 |
+
)
|
| 842 |
+
set_decryption_properties(self, config)
|
| 843 |
+
self._decryption_properties = config
|
| 844 |
+
|
| 845 |
+
@property
|
| 846 |
+
def parquet_decryption_config(self):
|
| 847 |
+
if not parquet_encryption_enabled:
|
| 848 |
+
raise NotImplementedError(
|
| 849 |
+
"Unable to access encryption features. "
|
| 850 |
+
"Encryption is not enabled in your installation of pyarrow."
|
| 851 |
+
)
|
| 852 |
+
return self._parquet_decryption_config
|
| 853 |
+
|
| 854 |
+
@parquet_decryption_config.setter
|
| 855 |
+
def parquet_decryption_config(self, config):
|
| 856 |
+
if not parquet_encryption_enabled:
|
| 857 |
+
raise NotImplementedError(
|
| 858 |
+
"Encryption is not enabled in your installation of pyarrow, but a "
|
| 859 |
+
"decryption_config was provided."
|
| 860 |
+
)
|
| 861 |
+
set_decryption_config(self, config)
|
| 862 |
+
self._parquet_decryption_config = config
|
| 863 |
+
|
| 864 |
+
@property
|
| 865 |
+
def page_checksum_verification(self):
|
| 866 |
+
return self.reader_properties().page_checksum_verification()
|
| 867 |
+
|
| 868 |
+
@page_checksum_verification.setter
|
| 869 |
+
def page_checksum_verification(self, bint page_checksum_verification):
|
| 870 |
+
self.reader_properties().set_page_checksum_verification(page_checksum_verification)
|
| 871 |
+
|
| 872 |
+
def equals(self, ParquetFragmentScanOptions other):
|
| 873 |
+
"""
|
| 874 |
+
Parameters
|
| 875 |
+
----------
|
| 876 |
+
other : pyarrow.dataset.ParquetFragmentScanOptions
|
| 877 |
+
|
| 878 |
+
Returns
|
| 879 |
+
-------
|
| 880 |
+
bool
|
| 881 |
+
"""
|
| 882 |
+
attrs = (
|
| 883 |
+
self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.cache_options,
|
| 884 |
+
self.thrift_string_size_limit, self.thrift_container_size_limit,
|
| 885 |
+
self.page_checksum_verification)
|
| 886 |
+
other_attrs = (
|
| 887 |
+
other.use_buffered_stream, other.buffer_size, other.pre_buffer, other.cache_options,
|
| 888 |
+
other.thrift_string_size_limit,
|
| 889 |
+
other.thrift_container_size_limit, other.page_checksum_verification)
|
| 890 |
+
return attrs == other_attrs
|
| 891 |
+
|
| 892 |
+
@staticmethod
|
| 893 |
+
@binding(True) # Required for Cython < 3
|
| 894 |
+
def _reconstruct(kwargs):
|
| 895 |
+
# __reduce__ doesn't allow passing named arguments directly to the
|
| 896 |
+
# reconstructor, hence this wrapper.
|
| 897 |
+
return ParquetFragmentScanOptions(**kwargs)
|
| 898 |
+
|
| 899 |
+
def __reduce__(self):
|
| 900 |
+
kwargs = dict(
|
| 901 |
+
use_buffered_stream=self.use_buffered_stream,
|
| 902 |
+
buffer_size=self.buffer_size,
|
| 903 |
+
pre_buffer=self.pre_buffer,
|
| 904 |
+
cache_options=self.cache_options,
|
| 905 |
+
thrift_string_size_limit=self.thrift_string_size_limit,
|
| 906 |
+
thrift_container_size_limit=self.thrift_container_size_limit,
|
| 907 |
+
page_checksum_verification=self.page_checksum_verification
|
| 908 |
+
)
|
| 909 |
+
return ParquetFragmentScanOptions._reconstruct, (kwargs,)
|
| 910 |
+
|
| 911 |
+
|
| 912 |
+
cdef class ParquetFactoryOptions(_Weakrefable):
|
| 913 |
+
"""
|
| 914 |
+
Influences the discovery of parquet dataset.
|
| 915 |
+
|
| 916 |
+
Parameters
|
| 917 |
+
----------
|
| 918 |
+
partition_base_dir : str, optional
|
| 919 |
+
For the purposes of applying the partitioning, paths will be
|
| 920 |
+
stripped of the partition_base_dir. Files not matching the
|
| 921 |
+
partition_base_dir prefix will be skipped for partitioning discovery.
|
| 922 |
+
The ignored files will still be part of the Dataset, but will not
|
| 923 |
+
have partition information.
|
| 924 |
+
partitioning : Partitioning, PartitioningFactory, optional
|
| 925 |
+
The partitioning scheme applied to fragments, see ``Partitioning``.
|
| 926 |
+
validate_column_chunk_paths : bool, default False
|
| 927 |
+
Assert that all ColumnChunk paths are consistent. The parquet spec
|
| 928 |
+
allows for ColumnChunk data to be stored in multiple files, but
|
| 929 |
+
ParquetDatasetFactory supports only a single file with all ColumnChunk
|
| 930 |
+
data. If this flag is set construction of a ParquetDatasetFactory will
|
| 931 |
+
raise an error if ColumnChunk data is not resident in a single file.
|
| 932 |
+
"""
|
| 933 |
+
|
| 934 |
+
cdef:
|
| 935 |
+
CParquetFactoryOptions options
|
| 936 |
+
|
| 937 |
+
__slots__ = () # avoid mistakingly creating attributes
|
| 938 |
+
|
| 939 |
+
def __init__(self, partition_base_dir=None, partitioning=None,
|
| 940 |
+
validate_column_chunk_paths=False):
|
| 941 |
+
if isinstance(partitioning, PartitioningFactory):
|
| 942 |
+
self.partitioning_factory = partitioning
|
| 943 |
+
elif isinstance(partitioning, Partitioning):
|
| 944 |
+
self.partitioning = partitioning
|
| 945 |
+
|
| 946 |
+
if partition_base_dir is not None:
|
| 947 |
+
self.partition_base_dir = partition_base_dir
|
| 948 |
+
|
| 949 |
+
self.options.validate_column_chunk_paths = validate_column_chunk_paths
|
| 950 |
+
|
| 951 |
+
cdef inline CParquetFactoryOptions unwrap(self):
|
| 952 |
+
return self.options
|
| 953 |
+
|
| 954 |
+
@property
|
| 955 |
+
def partitioning(self):
|
| 956 |
+
"""Partitioning to apply to discovered files.
|
| 957 |
+
|
| 958 |
+
NOTE: setting this property will overwrite partitioning_factory.
|
| 959 |
+
"""
|
| 960 |
+
c_partitioning = self.options.partitioning.partitioning()
|
| 961 |
+
if c_partitioning.get() == nullptr:
|
| 962 |
+
return None
|
| 963 |
+
return Partitioning.wrap(c_partitioning)
|
| 964 |
+
|
| 965 |
+
@partitioning.setter
|
| 966 |
+
def partitioning(self, Partitioning value):
|
| 967 |
+
self.options.partitioning = (<Partitioning> value).unwrap()
|
| 968 |
+
|
| 969 |
+
@property
|
| 970 |
+
def partitioning_factory(self):
|
| 971 |
+
"""PartitioningFactory to apply to discovered files and
|
| 972 |
+
discover a Partitioning.
|
| 973 |
+
|
| 974 |
+
NOTE: setting this property will overwrite partitioning.
|
| 975 |
+
"""
|
| 976 |
+
c_factory = self.options.partitioning.factory()
|
| 977 |
+
if c_factory.get() == nullptr:
|
| 978 |
+
return None
|
| 979 |
+
return PartitioningFactory.wrap(c_factory, None, None)
|
| 980 |
+
|
| 981 |
+
@partitioning_factory.setter
|
| 982 |
+
def partitioning_factory(self, PartitioningFactory value):
|
| 983 |
+
self.options.partitioning = (<PartitioningFactory> value).unwrap()
|
| 984 |
+
|
| 985 |
+
@property
|
| 986 |
+
def partition_base_dir(self):
|
| 987 |
+
"""
|
| 988 |
+
Base directory to strip paths before applying the partitioning.
|
| 989 |
+
"""
|
| 990 |
+
return frombytes(self.options.partition_base_dir)
|
| 991 |
+
|
| 992 |
+
@partition_base_dir.setter
|
| 993 |
+
def partition_base_dir(self, value):
|
| 994 |
+
self.options.partition_base_dir = tobytes(value)
|
| 995 |
+
|
| 996 |
+
@property
|
| 997 |
+
def validate_column_chunk_paths(self):
|
| 998 |
+
"""
|
| 999 |
+
Base directory to strip paths before applying the partitioning.
|
| 1000 |
+
"""
|
| 1001 |
+
return self.options.validate_column_chunk_paths
|
| 1002 |
+
|
| 1003 |
+
@validate_column_chunk_paths.setter
|
| 1004 |
+
def validate_column_chunk_paths(self, value):
|
| 1005 |
+
self.options.validate_column_chunk_paths = value
|
| 1006 |
+
|
| 1007 |
+
|
| 1008 |
+
cdef class ParquetDatasetFactory(DatasetFactory):
|
| 1009 |
+
"""
|
| 1010 |
+
Create a ParquetDatasetFactory from a Parquet `_metadata` file.
|
| 1011 |
+
|
| 1012 |
+
Parameters
|
| 1013 |
+
----------
|
| 1014 |
+
metadata_path : str
|
| 1015 |
+
Path to the `_metadata` parquet metadata-only file generated with
|
| 1016 |
+
`pyarrow.parquet.write_metadata`.
|
| 1017 |
+
filesystem : pyarrow.fs.FileSystem
|
| 1018 |
+
Filesystem to read the metadata_path from, and subsequent parquet
|
| 1019 |
+
files.
|
| 1020 |
+
format : ParquetFileFormat
|
| 1021 |
+
Parquet format options.
|
| 1022 |
+
options : ParquetFactoryOptions, optional
|
| 1023 |
+
Various flags influencing the discovery of filesystem paths.
|
| 1024 |
+
"""
|
| 1025 |
+
|
| 1026 |
+
cdef:
|
| 1027 |
+
CParquetDatasetFactory* parquet_factory
|
| 1028 |
+
|
| 1029 |
+
def __init__(self, metadata_path, FileSystem filesystem not None,
|
| 1030 |
+
FileFormat format not None,
|
| 1031 |
+
ParquetFactoryOptions options=None):
|
| 1032 |
+
cdef:
|
| 1033 |
+
c_string c_path
|
| 1034 |
+
shared_ptr[CFileSystem] c_filesystem
|
| 1035 |
+
shared_ptr[CParquetFileFormat] c_format
|
| 1036 |
+
CResult[shared_ptr[CDatasetFactory]] result
|
| 1037 |
+
CParquetFactoryOptions c_options
|
| 1038 |
+
|
| 1039 |
+
c_path = tobytes(metadata_path)
|
| 1040 |
+
c_filesystem = filesystem.unwrap()
|
| 1041 |
+
c_format = static_pointer_cast[CParquetFileFormat, CFileFormat](
|
| 1042 |
+
format.unwrap())
|
| 1043 |
+
options = options or ParquetFactoryOptions()
|
| 1044 |
+
c_options = options.unwrap()
|
| 1045 |
+
|
| 1046 |
+
with nogil:
|
| 1047 |
+
result = CParquetDatasetFactory.MakeFromMetaDataPath(
|
| 1048 |
+
c_path, c_filesystem, c_format, c_options)
|
| 1049 |
+
self.init(GetResultValue(result))
|
| 1050 |
+
|
| 1051 |
+
cdef init(self, shared_ptr[CDatasetFactory]& sp):
|
| 1052 |
+
DatasetFactory.init(self, sp)
|
| 1053 |
+
self.parquet_factory = <CParquetDatasetFactory*> sp.get()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_fs.pxd
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
|
| 20 |
+
from pyarrow.includes.common cimport *
|
| 21 |
+
from pyarrow.includes.libarrow_fs cimport *
|
| 22 |
+
from pyarrow.lib import _detect_compression, frombytes, tobytes
|
| 23 |
+
from pyarrow.lib cimport *
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
cpdef enum FileType:
|
| 27 |
+
NotFound = <int8_t> CFileType_NotFound
|
| 28 |
+
Unknown = <int8_t> CFileType_Unknown
|
| 29 |
+
File = <int8_t> CFileType_File
|
| 30 |
+
Directory = <int8_t> CFileType_Directory
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
cdef class FileInfo(_Weakrefable):
|
| 34 |
+
cdef:
|
| 35 |
+
CFileInfo info
|
| 36 |
+
|
| 37 |
+
@staticmethod
|
| 38 |
+
cdef wrap(CFileInfo info)
|
| 39 |
+
|
| 40 |
+
cdef inline CFileInfo unwrap(self) nogil
|
| 41 |
+
|
| 42 |
+
@staticmethod
|
| 43 |
+
cdef CFileInfo unwrap_safe(obj)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
cdef class FileSelector(_Weakrefable):
|
| 47 |
+
cdef:
|
| 48 |
+
CFileSelector selector
|
| 49 |
+
|
| 50 |
+
@staticmethod
|
| 51 |
+
cdef FileSelector wrap(CFileSelector selector)
|
| 52 |
+
|
| 53 |
+
cdef inline CFileSelector unwrap(self) nogil
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
cdef class FileSystem(_Weakrefable):
|
| 57 |
+
cdef:
|
| 58 |
+
shared_ptr[CFileSystem] wrapped
|
| 59 |
+
CFileSystem* fs
|
| 60 |
+
|
| 61 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped)
|
| 62 |
+
|
| 63 |
+
@staticmethod
|
| 64 |
+
cdef wrap(const shared_ptr[CFileSystem]& sp)
|
| 65 |
+
|
| 66 |
+
cdef inline shared_ptr[CFileSystem] unwrap(self) nogil
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
cdef class LocalFileSystem(FileSystem):
|
| 70 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
cdef class SubTreeFileSystem(FileSystem):
|
| 74 |
+
cdef:
|
| 75 |
+
CSubTreeFileSystem* subtreefs
|
| 76 |
+
|
| 77 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
cdef class _MockFileSystem(FileSystem):
|
| 81 |
+
cdef:
|
| 82 |
+
CMockFileSystem* mockfs
|
| 83 |
+
|
| 84 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
cdef class PyFileSystem(FileSystem):
|
| 88 |
+
cdef:
|
| 89 |
+
CPyFileSystem* pyfs
|
| 90 |
+
|
| 91 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_hdfs.pyx
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
|
| 20 |
+
from cython cimport binding
|
| 21 |
+
|
| 22 |
+
from pyarrow.includes.common cimport *
|
| 23 |
+
from pyarrow.includes.libarrow cimport *
|
| 24 |
+
from pyarrow.includes.libarrow_fs cimport *
|
| 25 |
+
from pyarrow._fs cimport FileSystem
|
| 26 |
+
|
| 27 |
+
from pyarrow.lib import frombytes, tobytes
|
| 28 |
+
from pyarrow.util import _stringify_path
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
cdef class HadoopFileSystem(FileSystem):
|
| 32 |
+
"""
|
| 33 |
+
HDFS backed FileSystem implementation
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
host : str
|
| 38 |
+
HDFS host to connect to. Set to "default" for fs.defaultFS from
|
| 39 |
+
core-site.xml.
|
| 40 |
+
port : int, default 8020
|
| 41 |
+
HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
|
| 42 |
+
user : str, default None
|
| 43 |
+
Username when connecting to HDFS; None implies login user.
|
| 44 |
+
replication : int, default 3
|
| 45 |
+
Number of copies each block will have.
|
| 46 |
+
buffer_size : int, default 0
|
| 47 |
+
If 0, no buffering will happen otherwise the size of the temporary read
|
| 48 |
+
and write buffer.
|
| 49 |
+
default_block_size : int, default None
|
| 50 |
+
None means the default configuration for HDFS, a typical block size is
|
| 51 |
+
128 MB.
|
| 52 |
+
kerb_ticket : string or path, default None
|
| 53 |
+
If not None, the path to the Kerberos ticket cache.
|
| 54 |
+
extra_conf : dict, default None
|
| 55 |
+
Extra key/value pairs for configuration; will override any
|
| 56 |
+
hdfs-site.xml properties.
|
| 57 |
+
|
| 58 |
+
Examples
|
| 59 |
+
--------
|
| 60 |
+
>>> from pyarrow import fs
|
| 61 |
+
>>> hdfs = fs.HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path) # doctest: +SKIP
|
| 62 |
+
|
| 63 |
+
For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
cdef:
|
| 67 |
+
CHadoopFileSystem* hdfs
|
| 68 |
+
|
| 69 |
+
def __init__(self, str host, int port=8020, *, str user=None,
|
| 70 |
+
int replication=3, int buffer_size=0,
|
| 71 |
+
default_block_size=None, kerb_ticket=None,
|
| 72 |
+
extra_conf=None):
|
| 73 |
+
cdef:
|
| 74 |
+
CHdfsOptions options
|
| 75 |
+
shared_ptr[CHadoopFileSystem] wrapped
|
| 76 |
+
|
| 77 |
+
if not host.startswith(('hdfs://', 'viewfs://')) and host != "default":
|
| 78 |
+
# TODO(kszucs): do more sanitization
|
| 79 |
+
host = 'hdfs://{}'.format(host)
|
| 80 |
+
|
| 81 |
+
options.ConfigureEndPoint(tobytes(host), int(port))
|
| 82 |
+
options.ConfigureReplication(replication)
|
| 83 |
+
options.ConfigureBufferSize(buffer_size)
|
| 84 |
+
|
| 85 |
+
if user is not None:
|
| 86 |
+
options.ConfigureUser(tobytes(user))
|
| 87 |
+
if default_block_size is not None:
|
| 88 |
+
options.ConfigureBlockSize(default_block_size)
|
| 89 |
+
if kerb_ticket is not None:
|
| 90 |
+
options.ConfigureKerberosTicketCachePath(
|
| 91 |
+
tobytes(_stringify_path(kerb_ticket)))
|
| 92 |
+
if extra_conf is not None:
|
| 93 |
+
for k, v in extra_conf.items():
|
| 94 |
+
options.ConfigureExtraConf(tobytes(k), tobytes(v))
|
| 95 |
+
|
| 96 |
+
with nogil:
|
| 97 |
+
wrapped = GetResultValue(CHadoopFileSystem.Make(options))
|
| 98 |
+
self.init(<shared_ptr[CFileSystem]> wrapped)
|
| 99 |
+
|
| 100 |
+
cdef init(self, const shared_ptr[CFileSystem]& wrapped):
|
| 101 |
+
FileSystem.init(self, wrapped)
|
| 102 |
+
self.hdfs = <CHadoopFileSystem*> wrapped.get()
|
| 103 |
+
|
| 104 |
+
@staticmethod
|
| 105 |
+
def from_uri(uri):
|
| 106 |
+
"""
|
| 107 |
+
Instantiate HadoopFileSystem object from an URI string.
|
| 108 |
+
|
| 109 |
+
The following two calls are equivalent
|
| 110 |
+
|
| 111 |
+
* ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
|
| 112 |
+
&replication=1')``
|
| 113 |
+
* ``HadoopFileSystem('localhost', port=8020, user='test', \
|
| 114 |
+
replication=1)``
|
| 115 |
+
|
| 116 |
+
Parameters
|
| 117 |
+
----------
|
| 118 |
+
uri : str
|
| 119 |
+
A string URI describing the connection to HDFS.
|
| 120 |
+
In order to change the user, replication, buffer_size or
|
| 121 |
+
default_block_size pass the values as query parts.
|
| 122 |
+
|
| 123 |
+
Returns
|
| 124 |
+
-------
|
| 125 |
+
HadoopFileSystem
|
| 126 |
+
"""
|
| 127 |
+
cdef:
|
| 128 |
+
HadoopFileSystem self = HadoopFileSystem.__new__(HadoopFileSystem)
|
| 129 |
+
shared_ptr[CHadoopFileSystem] wrapped
|
| 130 |
+
CHdfsOptions options
|
| 131 |
+
|
| 132 |
+
options = GetResultValue(CHdfsOptions.FromUriString(tobytes(uri)))
|
| 133 |
+
with nogil:
|
| 134 |
+
wrapped = GetResultValue(CHadoopFileSystem.Make(options))
|
| 135 |
+
|
| 136 |
+
self.init(<shared_ptr[CFileSystem]> wrapped)
|
| 137 |
+
return self
|
| 138 |
+
|
| 139 |
+
@staticmethod
|
| 140 |
+
@binding(True) # Required for cython < 3
|
| 141 |
+
def _reconstruct(kwargs):
|
| 142 |
+
# __reduce__ doesn't allow passing named arguments directly to the
|
| 143 |
+
# reconstructor, hence this wrapper.
|
| 144 |
+
return HadoopFileSystem(**kwargs)
|
| 145 |
+
|
| 146 |
+
def __reduce__(self):
|
| 147 |
+
cdef CHdfsOptions opts = self.hdfs.options()
|
| 148 |
+
return (
|
| 149 |
+
HadoopFileSystem._reconstruct, (dict(
|
| 150 |
+
host=frombytes(opts.connection_config.host),
|
| 151 |
+
port=opts.connection_config.port,
|
| 152 |
+
user=frombytes(opts.connection_config.user),
|
| 153 |
+
replication=opts.replication,
|
| 154 |
+
buffer_size=opts.buffer_size,
|
| 155 |
+
default_block_size=opts.default_block_size,
|
| 156 |
+
kerb_ticket=frombytes(opts.connection_config.kerb_ticket),
|
| 157 |
+
extra_conf={frombytes(k): frombytes(v)
|
| 158 |
+
for k, v in opts.connection_config.extra_conf},
|
| 159 |
+
),)
|
| 160 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pxd
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
|
| 20 |
+
from pyarrow.includes.libarrow cimport *
|
| 21 |
+
from pyarrow.lib cimport _Weakrefable
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
cdef class ParseOptions(_Weakrefable):
|
| 25 |
+
cdef:
|
| 26 |
+
CJSONParseOptions options
|
| 27 |
+
|
| 28 |
+
@staticmethod
|
| 29 |
+
cdef ParseOptions wrap(CJSONParseOptions options)
|
| 30 |
+
|
| 31 |
+
cdef class ReadOptions(_Weakrefable):
|
| 32 |
+
cdef:
|
| 33 |
+
CJSONReadOptions options
|
| 34 |
+
|
| 35 |
+
@staticmethod
|
| 36 |
+
cdef ReadOptions wrap(CJSONReadOptions options)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pyx
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: profile=False
|
| 19 |
+
# distutils: language = c++
|
| 20 |
+
# cython: language_level = 3
|
| 21 |
+
|
| 22 |
+
from pyarrow.includes.common cimport *
|
| 23 |
+
from pyarrow.includes.libarrow cimport *
|
| 24 |
+
from pyarrow.lib cimport (_Weakrefable, MemoryPool,
|
| 25 |
+
maybe_unbox_memory_pool,
|
| 26 |
+
get_input_stream, pyarrow_wrap_table,
|
| 27 |
+
pyarrow_wrap_schema, pyarrow_unwrap_schema)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
cdef class ReadOptions(_Weakrefable):
|
| 31 |
+
"""
|
| 32 |
+
Options for reading JSON files.
|
| 33 |
+
|
| 34 |
+
Parameters
|
| 35 |
+
----------
|
| 36 |
+
use_threads : bool, optional (default True)
|
| 37 |
+
Whether to use multiple threads to accelerate reading
|
| 38 |
+
block_size : int, optional
|
| 39 |
+
How much bytes to process at a time from the input stream.
|
| 40 |
+
This will determine multi-threading granularity as well as
|
| 41 |
+
the size of individual chunks in the Table.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
# Avoid mistakingly creating attributes
|
| 45 |
+
__slots__ = ()
|
| 46 |
+
|
| 47 |
+
def __init__(self, use_threads=None, block_size=None):
|
| 48 |
+
self.options = CJSONReadOptions.Defaults()
|
| 49 |
+
if use_threads is not None:
|
| 50 |
+
self.use_threads = use_threads
|
| 51 |
+
if block_size is not None:
|
| 52 |
+
self.block_size = block_size
|
| 53 |
+
|
| 54 |
+
@property
|
| 55 |
+
def use_threads(self):
|
| 56 |
+
"""
|
| 57 |
+
Whether to use multiple threads to accelerate reading.
|
| 58 |
+
"""
|
| 59 |
+
return self.options.use_threads
|
| 60 |
+
|
| 61 |
+
@use_threads.setter
|
| 62 |
+
def use_threads(self, value):
|
| 63 |
+
self.options.use_threads = value
|
| 64 |
+
|
| 65 |
+
@property
|
| 66 |
+
def block_size(self):
|
| 67 |
+
"""
|
| 68 |
+
How much bytes to process at a time from the input stream.
|
| 69 |
+
|
| 70 |
+
This will determine multi-threading granularity as well as the size of
|
| 71 |
+
individual chunks in the Table.
|
| 72 |
+
"""
|
| 73 |
+
return self.options.block_size
|
| 74 |
+
|
| 75 |
+
@block_size.setter
|
| 76 |
+
def block_size(self, value):
|
| 77 |
+
self.options.block_size = value
|
| 78 |
+
|
| 79 |
+
def __reduce__(self):
|
| 80 |
+
return ReadOptions, (
|
| 81 |
+
self.use_threads,
|
| 82 |
+
self.block_size
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
def equals(self, ReadOptions other):
|
| 86 |
+
"""
|
| 87 |
+
Parameters
|
| 88 |
+
----------
|
| 89 |
+
other : pyarrow.json.ReadOptions
|
| 90 |
+
|
| 91 |
+
Returns
|
| 92 |
+
-------
|
| 93 |
+
bool
|
| 94 |
+
"""
|
| 95 |
+
return (
|
| 96 |
+
self.use_threads == other.use_threads and
|
| 97 |
+
self.block_size == other.block_size
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def __eq__(self, other):
|
| 101 |
+
try:
|
| 102 |
+
return self.equals(other)
|
| 103 |
+
except TypeError:
|
| 104 |
+
return False
|
| 105 |
+
|
| 106 |
+
@staticmethod
|
| 107 |
+
cdef ReadOptions wrap(CJSONReadOptions options):
|
| 108 |
+
out = ReadOptions()
|
| 109 |
+
out.options = options # shallow copy
|
| 110 |
+
return out
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
cdef class ParseOptions(_Weakrefable):
|
| 114 |
+
"""
|
| 115 |
+
Options for parsing JSON files.
|
| 116 |
+
|
| 117 |
+
Parameters
|
| 118 |
+
----------
|
| 119 |
+
explicit_schema : Schema, optional (default None)
|
| 120 |
+
Optional explicit schema (no type inference, ignores other fields).
|
| 121 |
+
newlines_in_values : bool, optional (default False)
|
| 122 |
+
Whether objects may be printed across multiple lines (for example
|
| 123 |
+
pretty printed). If false, input must end with an empty line.
|
| 124 |
+
unexpected_field_behavior : str, default "infer"
|
| 125 |
+
How JSON fields outside of explicit_schema (if given) are treated.
|
| 126 |
+
|
| 127 |
+
Possible behaviors:
|
| 128 |
+
|
| 129 |
+
- "ignore": unexpected JSON fields are ignored
|
| 130 |
+
- "error": error out on unexpected JSON fields
|
| 131 |
+
- "infer": unexpected JSON fields are type-inferred and included in
|
| 132 |
+
the output
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
__slots__ = ()
|
| 136 |
+
|
| 137 |
+
def __init__(self, explicit_schema=None, newlines_in_values=None,
|
| 138 |
+
unexpected_field_behavior=None):
|
| 139 |
+
self.options = CJSONParseOptions.Defaults()
|
| 140 |
+
if explicit_schema is not None:
|
| 141 |
+
self.explicit_schema = explicit_schema
|
| 142 |
+
if newlines_in_values is not None:
|
| 143 |
+
self.newlines_in_values = newlines_in_values
|
| 144 |
+
if unexpected_field_behavior is not None:
|
| 145 |
+
self.unexpected_field_behavior = unexpected_field_behavior
|
| 146 |
+
|
| 147 |
+
def __reduce__(self):
|
| 148 |
+
return ParseOptions, (
|
| 149 |
+
self.explicit_schema,
|
| 150 |
+
self.newlines_in_values,
|
| 151 |
+
self.unexpected_field_behavior
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
@property
|
| 155 |
+
def explicit_schema(self):
|
| 156 |
+
"""
|
| 157 |
+
Optional explicit schema (no type inference, ignores other fields)
|
| 158 |
+
"""
|
| 159 |
+
if self.options.explicit_schema.get() == NULL:
|
| 160 |
+
return None
|
| 161 |
+
else:
|
| 162 |
+
return pyarrow_wrap_schema(self.options.explicit_schema)
|
| 163 |
+
|
| 164 |
+
@explicit_schema.setter
|
| 165 |
+
def explicit_schema(self, value):
|
| 166 |
+
self.options.explicit_schema = pyarrow_unwrap_schema(value)
|
| 167 |
+
|
| 168 |
+
@property
|
| 169 |
+
def newlines_in_values(self):
|
| 170 |
+
"""
|
| 171 |
+
Whether newline characters are allowed in JSON values.
|
| 172 |
+
Setting this to True reduces the performance of multi-threaded
|
| 173 |
+
JSON reading.
|
| 174 |
+
"""
|
| 175 |
+
return self.options.newlines_in_values
|
| 176 |
+
|
| 177 |
+
@newlines_in_values.setter
|
| 178 |
+
def newlines_in_values(self, value):
|
| 179 |
+
self.options.newlines_in_values = value
|
| 180 |
+
|
| 181 |
+
@property
|
| 182 |
+
def unexpected_field_behavior(self):
|
| 183 |
+
"""
|
| 184 |
+
How JSON fields outside of explicit_schema (if given) are treated.
|
| 185 |
+
|
| 186 |
+
Possible behaviors:
|
| 187 |
+
|
| 188 |
+
- "ignore": unexpected JSON fields are ignored
|
| 189 |
+
- "error": error out on unexpected JSON fields
|
| 190 |
+
- "infer": unexpected JSON fields are type-inferred and included in
|
| 191 |
+
the output
|
| 192 |
+
|
| 193 |
+
Set to "infer" by default.
|
| 194 |
+
"""
|
| 195 |
+
v = self.options.unexpected_field_behavior
|
| 196 |
+
if v == CUnexpectedFieldBehavior_Ignore:
|
| 197 |
+
return "ignore"
|
| 198 |
+
elif v == CUnexpectedFieldBehavior_Error:
|
| 199 |
+
return "error"
|
| 200 |
+
elif v == CUnexpectedFieldBehavior_InferType:
|
| 201 |
+
return "infer"
|
| 202 |
+
else:
|
| 203 |
+
raise ValueError('Unexpected value for unexpected_field_behavior')
|
| 204 |
+
|
| 205 |
+
@unexpected_field_behavior.setter
|
| 206 |
+
def unexpected_field_behavior(self, value):
|
| 207 |
+
cdef CUnexpectedFieldBehavior v
|
| 208 |
+
|
| 209 |
+
if value == "ignore":
|
| 210 |
+
v = CUnexpectedFieldBehavior_Ignore
|
| 211 |
+
elif value == "error":
|
| 212 |
+
v = CUnexpectedFieldBehavior_Error
|
| 213 |
+
elif value == "infer":
|
| 214 |
+
v = CUnexpectedFieldBehavior_InferType
|
| 215 |
+
else:
|
| 216 |
+
raise ValueError(
|
| 217 |
+
"Unexpected value `{}` for `unexpected_field_behavior`, pass "
|
| 218 |
+
"either `ignore`, `error` or `infer`.".format(value)
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
self.options.unexpected_field_behavior = v
|
| 222 |
+
|
| 223 |
+
def equals(self, ParseOptions other):
|
| 224 |
+
"""
|
| 225 |
+
Parameters
|
| 226 |
+
----------
|
| 227 |
+
other : pyarrow.json.ParseOptions
|
| 228 |
+
|
| 229 |
+
Returns
|
| 230 |
+
-------
|
| 231 |
+
bool
|
| 232 |
+
"""
|
| 233 |
+
return (
|
| 234 |
+
self.explicit_schema == other.explicit_schema and
|
| 235 |
+
self.newlines_in_values == other.newlines_in_values and
|
| 236 |
+
self.unexpected_field_behavior == other.unexpected_field_behavior
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
def __eq__(self, other):
|
| 240 |
+
try:
|
| 241 |
+
return self.equals(other)
|
| 242 |
+
except TypeError:
|
| 243 |
+
return False
|
| 244 |
+
|
| 245 |
+
@staticmethod
|
| 246 |
+
cdef ParseOptions wrap(CJSONParseOptions options):
|
| 247 |
+
out = ParseOptions()
|
| 248 |
+
out.options = options # shallow copy
|
| 249 |
+
return out
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
cdef _get_reader(input_file, shared_ptr[CInputStream]* out):
|
| 253 |
+
use_memory_map = False
|
| 254 |
+
get_input_stream(input_file, use_memory_map, out)
|
| 255 |
+
|
| 256 |
+
cdef _get_read_options(ReadOptions read_options, CJSONReadOptions* out):
|
| 257 |
+
if read_options is None:
|
| 258 |
+
out[0] = CJSONReadOptions.Defaults()
|
| 259 |
+
else:
|
| 260 |
+
out[0] = read_options.options
|
| 261 |
+
|
| 262 |
+
cdef _get_parse_options(ParseOptions parse_options, CJSONParseOptions* out):
|
| 263 |
+
if parse_options is None:
|
| 264 |
+
out[0] = CJSONParseOptions.Defaults()
|
| 265 |
+
else:
|
| 266 |
+
out[0] = parse_options.options
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def read_json(input_file, read_options=None, parse_options=None,
|
| 270 |
+
MemoryPool memory_pool=None):
|
| 271 |
+
"""
|
| 272 |
+
Read a Table from a stream of JSON data.
|
| 273 |
+
|
| 274 |
+
Parameters
|
| 275 |
+
----------
|
| 276 |
+
input_file : str, path or file-like object
|
| 277 |
+
The location of JSON data. Currently only the line-delimited JSON
|
| 278 |
+
format is supported.
|
| 279 |
+
read_options : pyarrow.json.ReadOptions, optional
|
| 280 |
+
Options for the JSON reader (see ReadOptions constructor for defaults).
|
| 281 |
+
parse_options : pyarrow.json.ParseOptions, optional
|
| 282 |
+
Options for the JSON parser
|
| 283 |
+
(see ParseOptions constructor for defaults).
|
| 284 |
+
memory_pool : MemoryPool, optional
|
| 285 |
+
Pool to allocate Table memory from.
|
| 286 |
+
|
| 287 |
+
Returns
|
| 288 |
+
-------
|
| 289 |
+
:class:`pyarrow.Table`
|
| 290 |
+
Contents of the JSON file as a in-memory table.
|
| 291 |
+
"""
|
| 292 |
+
cdef:
|
| 293 |
+
shared_ptr[CInputStream] stream
|
| 294 |
+
CJSONReadOptions c_read_options
|
| 295 |
+
CJSONParseOptions c_parse_options
|
| 296 |
+
shared_ptr[CJSONReader] reader
|
| 297 |
+
shared_ptr[CTable] table
|
| 298 |
+
|
| 299 |
+
_get_reader(input_file, &stream)
|
| 300 |
+
_get_read_options(read_options, &c_read_options)
|
| 301 |
+
_get_parse_options(parse_options, &c_parse_options)
|
| 302 |
+
|
| 303 |
+
reader = GetResultValue(
|
| 304 |
+
CJSONReader.Make(maybe_unbox_memory_pool(memory_pool),
|
| 305 |
+
stream, c_read_options, c_parse_options))
|
| 306 |
+
|
| 307 |
+
with nogil:
|
| 308 |
+
table = GetResultValue(reader.get().Read())
|
| 309 |
+
|
| 310 |
+
return pyarrow_wrap_table(table)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_orc.pyx
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: profile=False
|
| 19 |
+
# distutils: language = c++
|
| 20 |
+
|
| 21 |
+
from cython.operator cimport dereference as deref
|
| 22 |
+
from libcpp.vector cimport vector as std_vector
|
| 23 |
+
from libcpp.utility cimport move
|
| 24 |
+
from pyarrow.includes.common cimport *
|
| 25 |
+
from pyarrow.includes.libarrow cimport *
|
| 26 |
+
from pyarrow.lib cimport (check_status, _Weakrefable,
|
| 27 |
+
MemoryPool, maybe_unbox_memory_pool,
|
| 28 |
+
pyarrow_wrap_schema,
|
| 29 |
+
pyarrow_wrap_batch,
|
| 30 |
+
Table,
|
| 31 |
+
pyarrow_wrap_table,
|
| 32 |
+
pyarrow_wrap_metadata,
|
| 33 |
+
pyarrow_unwrap_table,
|
| 34 |
+
get_reader,
|
| 35 |
+
get_writer)
|
| 36 |
+
from pyarrow.lib import frombytes, tobytes
|
| 37 |
+
from pyarrow.util import _stringify_path
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
cdef compression_type_from_enum(CCompressionType compression_type):
|
| 41 |
+
compression_map = {
|
| 42 |
+
CCompressionType_UNCOMPRESSED: 'UNCOMPRESSED',
|
| 43 |
+
CCompressionType_GZIP: 'ZLIB',
|
| 44 |
+
CCompressionType_SNAPPY: 'SNAPPY',
|
| 45 |
+
CCompressionType_LZ4: 'LZ4',
|
| 46 |
+
CCompressionType_ZSTD: 'ZSTD',
|
| 47 |
+
}
|
| 48 |
+
if compression_type in compression_map:
|
| 49 |
+
return compression_map[compression_type]
|
| 50 |
+
raise ValueError('Unsupported compression')
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
cdef CCompressionType compression_type_from_name(name) except *:
|
| 54 |
+
if not isinstance(name, str):
|
| 55 |
+
raise TypeError('compression must be a string')
|
| 56 |
+
name = name.upper()
|
| 57 |
+
if name == 'ZLIB':
|
| 58 |
+
return CCompressionType_GZIP
|
| 59 |
+
elif name == 'SNAPPY':
|
| 60 |
+
return CCompressionType_SNAPPY
|
| 61 |
+
elif name == 'LZ4':
|
| 62 |
+
return CCompressionType_LZ4
|
| 63 |
+
elif name == 'ZSTD':
|
| 64 |
+
return CCompressionType_ZSTD
|
| 65 |
+
elif name == 'UNCOMPRESSED':
|
| 66 |
+
return CCompressionType_UNCOMPRESSED
|
| 67 |
+
raise ValueError(f'Unknown CompressionKind: {name}')
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
cdef compression_strategy_from_enum(
|
| 71 |
+
CompressionStrategy compression_strategy
|
| 72 |
+
):
|
| 73 |
+
compression_strategy_map = {
|
| 74 |
+
_CompressionStrategy_SPEED: 'SPEED',
|
| 75 |
+
_CompressionStrategy_COMPRESSION: 'COMPRESSION',
|
| 76 |
+
}
|
| 77 |
+
if compression_strategy in compression_strategy_map:
|
| 78 |
+
return compression_strategy_map[compression_strategy]
|
| 79 |
+
raise ValueError('Unsupported compression strategy')
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
cdef CompressionStrategy compression_strategy_from_name(name) except *:
|
| 83 |
+
if not isinstance(name, str):
|
| 84 |
+
raise TypeError('compression strategy must be a string')
|
| 85 |
+
name = name.upper()
|
| 86 |
+
if name == 'COMPRESSION':
|
| 87 |
+
return _CompressionStrategy_COMPRESSION
|
| 88 |
+
elif name == 'SPEED':
|
| 89 |
+
return _CompressionStrategy_SPEED
|
| 90 |
+
raise ValueError(f'Unknown CompressionStrategy: {name}')
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
cdef file_version_from_class(FileVersion file_version):
|
| 94 |
+
return frombytes(file_version.ToString())
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
cdef writer_id_from_enum(WriterId writer_id):
|
| 98 |
+
writer_id_map = {
|
| 99 |
+
_WriterId_ORC_JAVA_WRITER: 'ORC_JAVA',
|
| 100 |
+
_WriterId_ORC_CPP_WRITER: 'ORC_CPP',
|
| 101 |
+
_WriterId_PRESTO_WRITER: 'PRESTO',
|
| 102 |
+
_WriterId_SCRITCHLEY_GO: 'SCRITCHLEY_GO',
|
| 103 |
+
_WriterId_TRINO_WRITER: 'TRINO',
|
| 104 |
+
}
|
| 105 |
+
if writer_id in writer_id_map:
|
| 106 |
+
return writer_id_map[writer_id]
|
| 107 |
+
raise ValueError('Unsupported writer ID')
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
cdef writer_version_from_enum(WriterVersion writer_version):
|
| 111 |
+
writer_version_map = {
|
| 112 |
+
_WriterVersion_ORIGINAL: 'ORIGINAL',
|
| 113 |
+
_WriterVersion_HIVE_8732: 'HIVE_8732',
|
| 114 |
+
_WriterVersion_HIVE_4243: 'HIVE_4243',
|
| 115 |
+
_WriterVersion_HIVE_12055: 'HIVE_12055',
|
| 116 |
+
_WriterVersion_HIVE_13083: 'HIVE_13083',
|
| 117 |
+
_WriterVersion_ORC_101: 'ORC_101',
|
| 118 |
+
_WriterVersion_ORC_135: 'ORC_135',
|
| 119 |
+
_WriterVersion_ORC_517: 'ORC_517',
|
| 120 |
+
_WriterVersion_ORC_203: 'ORC_203',
|
| 121 |
+
_WriterVersion_ORC_14: 'ORC_14',
|
| 122 |
+
}
|
| 123 |
+
if writer_version in writer_version_map:
|
| 124 |
+
return writer_version_map[writer_version]
|
| 125 |
+
raise ValueError('Unsupported writer version')
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
cdef shared_ptr[WriteOptions] _create_write_options(
|
| 129 |
+
file_version=None,
|
| 130 |
+
batch_size=None,
|
| 131 |
+
stripe_size=None,
|
| 132 |
+
compression=None,
|
| 133 |
+
compression_block_size=None,
|
| 134 |
+
compression_strategy=None,
|
| 135 |
+
row_index_stride=None,
|
| 136 |
+
padding_tolerance=None,
|
| 137 |
+
dictionary_key_size_threshold=None,
|
| 138 |
+
bloom_filter_columns=None,
|
| 139 |
+
bloom_filter_fpp=None
|
| 140 |
+
) except *:
|
| 141 |
+
"""General writer options"""
|
| 142 |
+
cdef:
|
| 143 |
+
shared_ptr[WriteOptions] options
|
| 144 |
+
options = make_shared[WriteOptions]()
|
| 145 |
+
# batch_size
|
| 146 |
+
if batch_size is not None:
|
| 147 |
+
if isinstance(batch_size, int) and batch_size > 0:
|
| 148 |
+
deref(options).batch_size = batch_size
|
| 149 |
+
else:
|
| 150 |
+
raise ValueError(f"Invalid ORC writer batch size: {batch_size}")
|
| 151 |
+
# file_version
|
| 152 |
+
if file_version is not None:
|
| 153 |
+
if file_version == "0.12":
|
| 154 |
+
deref(options).file_version = FileVersion(0, 12)
|
| 155 |
+
elif file_version == "0.11":
|
| 156 |
+
deref(options).file_version = FileVersion(0, 11)
|
| 157 |
+
else:
|
| 158 |
+
raise ValueError(f"Unsupported ORC file version: {file_version}")
|
| 159 |
+
# stripe_size
|
| 160 |
+
if stripe_size is not None:
|
| 161 |
+
if isinstance(stripe_size, int) and stripe_size > 0:
|
| 162 |
+
deref(options).stripe_size = stripe_size
|
| 163 |
+
else:
|
| 164 |
+
raise ValueError(f"Invalid ORC stripe size: {stripe_size}")
|
| 165 |
+
# compression
|
| 166 |
+
if compression is not None:
|
| 167 |
+
if isinstance(compression, str):
|
| 168 |
+
deref(options).compression = compression_type_from_name(
|
| 169 |
+
compression)
|
| 170 |
+
else:
|
| 171 |
+
raise TypeError("Unsupported ORC compression type: "
|
| 172 |
+
f"{compression}")
|
| 173 |
+
# compression_block_size
|
| 174 |
+
if compression_block_size is not None:
|
| 175 |
+
if (isinstance(compression_block_size, int) and
|
| 176 |
+
compression_block_size > 0):
|
| 177 |
+
deref(options).compression_block_size = compression_block_size
|
| 178 |
+
else:
|
| 179 |
+
raise ValueError("Invalid ORC compression block size: "
|
| 180 |
+
f"{compression_block_size}")
|
| 181 |
+
# compression_strategy
|
| 182 |
+
if compression_strategy is not None:
|
| 183 |
+
if isinstance(compression, str):
|
| 184 |
+
deref(options).compression_strategy = \
|
| 185 |
+
compression_strategy_from_name(compression_strategy)
|
| 186 |
+
else:
|
| 187 |
+
raise TypeError("Unsupported ORC compression strategy: "
|
| 188 |
+
f"{compression_strategy}")
|
| 189 |
+
# row_index_stride
|
| 190 |
+
if row_index_stride is not None:
|
| 191 |
+
if isinstance(row_index_stride, int) and row_index_stride > 0:
|
| 192 |
+
deref(options).row_index_stride = row_index_stride
|
| 193 |
+
else:
|
| 194 |
+
raise ValueError("Invalid ORC row index stride: "
|
| 195 |
+
f"{row_index_stride}")
|
| 196 |
+
# padding_tolerance
|
| 197 |
+
if padding_tolerance is not None:
|
| 198 |
+
try:
|
| 199 |
+
padding_tolerance = float(padding_tolerance)
|
| 200 |
+
deref(options).padding_tolerance = padding_tolerance
|
| 201 |
+
except Exception:
|
| 202 |
+
raise ValueError("Invalid ORC padding tolerance: "
|
| 203 |
+
f"{padding_tolerance}")
|
| 204 |
+
# dictionary_key_size_threshold
|
| 205 |
+
if dictionary_key_size_threshold is not None:
|
| 206 |
+
try:
|
| 207 |
+
dictionary_key_size_threshold = float(
|
| 208 |
+
dictionary_key_size_threshold)
|
| 209 |
+
assert 0 <= dictionary_key_size_threshold <= 1
|
| 210 |
+
deref(options).dictionary_key_size_threshold = \
|
| 211 |
+
dictionary_key_size_threshold
|
| 212 |
+
except Exception:
|
| 213 |
+
raise ValueError("Invalid ORC dictionary key size threshold: "
|
| 214 |
+
f"{dictionary_key_size_threshold}")
|
| 215 |
+
# bloom_filter_columns
|
| 216 |
+
if bloom_filter_columns is not None:
|
| 217 |
+
try:
|
| 218 |
+
bloom_filter_columns = list(bloom_filter_columns)
|
| 219 |
+
for col in bloom_filter_columns:
|
| 220 |
+
assert isinstance(col, int) and col >= 0
|
| 221 |
+
deref(options).bloom_filter_columns = bloom_filter_columns
|
| 222 |
+
except Exception:
|
| 223 |
+
raise ValueError("Invalid ORC BloomFilter columns: "
|
| 224 |
+
f"{bloom_filter_columns}")
|
| 225 |
+
# Max false positive rate of the Bloom Filter
|
| 226 |
+
if bloom_filter_fpp is not None:
|
| 227 |
+
try:
|
| 228 |
+
bloom_filter_fpp = float(bloom_filter_fpp)
|
| 229 |
+
assert 0 <= bloom_filter_fpp <= 1
|
| 230 |
+
deref(options).bloom_filter_fpp = bloom_filter_fpp
|
| 231 |
+
except Exception:
|
| 232 |
+
raise ValueError("Invalid ORC BloomFilter false positive rate: "
|
| 233 |
+
f"{bloom_filter_fpp}")
|
| 234 |
+
return options
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
cdef class ORCReader(_Weakrefable):
|
| 238 |
+
cdef:
|
| 239 |
+
object source
|
| 240 |
+
CMemoryPool* allocator
|
| 241 |
+
unique_ptr[ORCFileReader] reader
|
| 242 |
+
|
| 243 |
+
def __cinit__(self, MemoryPool memory_pool=None):
|
| 244 |
+
self.allocator = maybe_unbox_memory_pool(memory_pool)
|
| 245 |
+
|
| 246 |
+
def open(self, object source, c_bool use_memory_map=True):
|
| 247 |
+
cdef:
|
| 248 |
+
shared_ptr[CRandomAccessFile] rd_handle
|
| 249 |
+
|
| 250 |
+
self.source = source
|
| 251 |
+
|
| 252 |
+
get_reader(source, use_memory_map, &rd_handle)
|
| 253 |
+
with nogil:
|
| 254 |
+
self.reader = move(GetResultValue(
|
| 255 |
+
ORCFileReader.Open(rd_handle, self.allocator)
|
| 256 |
+
))
|
| 257 |
+
|
| 258 |
+
def metadata(self):
|
| 259 |
+
"""
|
| 260 |
+
The arrow metadata for this file.
|
| 261 |
+
|
| 262 |
+
Returns
|
| 263 |
+
-------
|
| 264 |
+
metadata : pyarrow.KeyValueMetadata
|
| 265 |
+
"""
|
| 266 |
+
cdef:
|
| 267 |
+
shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
|
| 268 |
+
|
| 269 |
+
with nogil:
|
| 270 |
+
sp_arrow_metadata = GetResultValue(
|
| 271 |
+
deref(self.reader).ReadMetadata()
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
return pyarrow_wrap_metadata(sp_arrow_metadata)
|
| 275 |
+
|
| 276 |
+
def schema(self):
|
| 277 |
+
"""
|
| 278 |
+
The arrow schema for this file.
|
| 279 |
+
|
| 280 |
+
Returns
|
| 281 |
+
-------
|
| 282 |
+
schema : pyarrow.Schema
|
| 283 |
+
"""
|
| 284 |
+
cdef:
|
| 285 |
+
shared_ptr[CSchema] sp_arrow_schema
|
| 286 |
+
|
| 287 |
+
with nogil:
|
| 288 |
+
sp_arrow_schema = GetResultValue(deref(self.reader).ReadSchema())
|
| 289 |
+
|
| 290 |
+
return pyarrow_wrap_schema(sp_arrow_schema)
|
| 291 |
+
|
| 292 |
+
def nrows(self):
|
| 293 |
+
return deref(self.reader).NumberOfRows()
|
| 294 |
+
|
| 295 |
+
def nstripes(self):
|
| 296 |
+
return deref(self.reader).NumberOfStripes()
|
| 297 |
+
|
| 298 |
+
def file_version(self):
|
| 299 |
+
return file_version_from_class(deref(self.reader).GetFileVersion())
|
| 300 |
+
|
| 301 |
+
def software_version(self):
|
| 302 |
+
return frombytes(deref(self.reader).GetSoftwareVersion())
|
| 303 |
+
|
| 304 |
+
def compression(self):
|
| 305 |
+
return compression_type_from_enum(
|
| 306 |
+
GetResultValue(deref(self.reader).GetCompression()))
|
| 307 |
+
|
| 308 |
+
def compression_size(self):
|
| 309 |
+
return deref(self.reader).GetCompressionSize()
|
| 310 |
+
|
| 311 |
+
def row_index_stride(self):
|
| 312 |
+
return deref(self.reader).GetRowIndexStride()
|
| 313 |
+
|
| 314 |
+
def writer(self):
|
| 315 |
+
writer_name = writer_id_from_enum(deref(self.reader).GetWriterId())
|
| 316 |
+
if writer_name == 'UNKNOWN':
|
| 317 |
+
return deref(self.reader).GetWriterIdValue()
|
| 318 |
+
else:
|
| 319 |
+
return writer_name
|
| 320 |
+
|
| 321 |
+
def writer_version(self):
|
| 322 |
+
return writer_version_from_enum(deref(self.reader).GetWriterVersion())
|
| 323 |
+
|
| 324 |
+
def nstripe_statistics(self):
|
| 325 |
+
return deref(self.reader).GetNumberOfStripeStatistics()
|
| 326 |
+
|
| 327 |
+
def content_length(self):
|
| 328 |
+
return deref(self.reader).GetContentLength()
|
| 329 |
+
|
| 330 |
+
def stripe_statistics_length(self):
|
| 331 |
+
return deref(self.reader).GetStripeStatisticsLength()
|
| 332 |
+
|
| 333 |
+
def file_footer_length(self):
|
| 334 |
+
return deref(self.reader).GetFileFooterLength()
|
| 335 |
+
|
| 336 |
+
def file_postscript_length(self):
|
| 337 |
+
return deref(self.reader).GetFilePostscriptLength()
|
| 338 |
+
|
| 339 |
+
def file_length(self):
|
| 340 |
+
return deref(self.reader).GetFileLength()
|
| 341 |
+
|
| 342 |
+
def serialized_file_tail(self):
|
| 343 |
+
return deref(self.reader).GetSerializedFileTail()
|
| 344 |
+
|
| 345 |
+
def read_stripe(self, n, columns=None):
|
| 346 |
+
cdef:
|
| 347 |
+
shared_ptr[CRecordBatch] sp_record_batch
|
| 348 |
+
int64_t stripe
|
| 349 |
+
std_vector[c_string] c_names
|
| 350 |
+
|
| 351 |
+
stripe = n
|
| 352 |
+
|
| 353 |
+
if columns is None:
|
| 354 |
+
with nogil:
|
| 355 |
+
sp_record_batch = GetResultValue(
|
| 356 |
+
deref(self.reader).ReadStripe(stripe)
|
| 357 |
+
)
|
| 358 |
+
else:
|
| 359 |
+
c_names = [tobytes(name) for name in columns]
|
| 360 |
+
with nogil:
|
| 361 |
+
sp_record_batch = GetResultValue(
|
| 362 |
+
deref(self.reader).ReadStripe(stripe, c_names)
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
return pyarrow_wrap_batch(sp_record_batch)
|
| 366 |
+
|
| 367 |
+
def read(self, columns=None):
|
| 368 |
+
cdef:
|
| 369 |
+
shared_ptr[CTable] sp_table
|
| 370 |
+
std_vector[c_string] c_names
|
| 371 |
+
|
| 372 |
+
if columns is None:
|
| 373 |
+
with nogil:
|
| 374 |
+
sp_table = GetResultValue(deref(self.reader).Read())
|
| 375 |
+
else:
|
| 376 |
+
c_names = [tobytes(name) for name in columns]
|
| 377 |
+
with nogil:
|
| 378 |
+
sp_table = GetResultValue(deref(self.reader).Read(c_names))
|
| 379 |
+
|
| 380 |
+
return pyarrow_wrap_table(sp_table)
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
cdef class ORCWriter(_Weakrefable):
|
| 384 |
+
cdef:
|
| 385 |
+
unique_ptr[ORCFileWriter] writer
|
| 386 |
+
shared_ptr[COutputStream] sink
|
| 387 |
+
c_bool own_sink
|
| 388 |
+
|
| 389 |
+
def open(self, object where, *,
|
| 390 |
+
file_version=None,
|
| 391 |
+
batch_size=None,
|
| 392 |
+
stripe_size=None,
|
| 393 |
+
compression=None,
|
| 394 |
+
compression_block_size=None,
|
| 395 |
+
compression_strategy=None,
|
| 396 |
+
row_index_stride=None,
|
| 397 |
+
padding_tolerance=None,
|
| 398 |
+
dictionary_key_size_threshold=None,
|
| 399 |
+
bloom_filter_columns=None,
|
| 400 |
+
bloom_filter_fpp=None):
|
| 401 |
+
cdef:
|
| 402 |
+
shared_ptr[WriteOptions] write_options
|
| 403 |
+
c_string c_where
|
| 404 |
+
try:
|
| 405 |
+
where = _stringify_path(where)
|
| 406 |
+
except TypeError:
|
| 407 |
+
get_writer(where, &self.sink)
|
| 408 |
+
self.own_sink = False
|
| 409 |
+
else:
|
| 410 |
+
c_where = tobytes(where)
|
| 411 |
+
with nogil:
|
| 412 |
+
self.sink = GetResultValue(FileOutputStream.Open(c_where))
|
| 413 |
+
self.own_sink = True
|
| 414 |
+
|
| 415 |
+
write_options = _create_write_options(
|
| 416 |
+
file_version=file_version,
|
| 417 |
+
batch_size=batch_size,
|
| 418 |
+
stripe_size=stripe_size,
|
| 419 |
+
compression=compression,
|
| 420 |
+
compression_block_size=compression_block_size,
|
| 421 |
+
compression_strategy=compression_strategy,
|
| 422 |
+
row_index_stride=row_index_stride,
|
| 423 |
+
padding_tolerance=padding_tolerance,
|
| 424 |
+
dictionary_key_size_threshold=dictionary_key_size_threshold,
|
| 425 |
+
bloom_filter_columns=bloom_filter_columns,
|
| 426 |
+
bloom_filter_fpp=bloom_filter_fpp
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
with nogil:
|
| 430 |
+
self.writer = move(GetResultValue(
|
| 431 |
+
ORCFileWriter.Open(self.sink.get(),
|
| 432 |
+
deref(write_options))))
|
| 433 |
+
|
| 434 |
+
def write(self, Table table):
|
| 435 |
+
cdef:
|
| 436 |
+
shared_ptr[CTable] sp_table
|
| 437 |
+
sp_table = pyarrow_unwrap_table(table)
|
| 438 |
+
with nogil:
|
| 439 |
+
check_status(deref(self.writer).Write(deref(sp_table)))
|
| 440 |
+
|
| 441 |
+
def close(self):
|
| 442 |
+
with nogil:
|
| 443 |
+
check_status(deref(self.writer).Close())
|
| 444 |
+
if self.own_sink:
|
| 445 |
+
check_status(deref(self.sink).Close())
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_parquet.pyx
ADDED
|
@@ -0,0 +1,2266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: profile=False
|
| 19 |
+
# distutils: language = c++
|
| 20 |
+
|
| 21 |
+
from collections.abc import Sequence
|
| 22 |
+
from textwrap import indent
|
| 23 |
+
import warnings
|
| 24 |
+
|
| 25 |
+
from cython.operator cimport dereference as deref
|
| 26 |
+
from pyarrow.includes.common cimport *
|
| 27 |
+
from pyarrow.includes.libarrow cimport *
|
| 28 |
+
from pyarrow.includes.libarrow_python cimport *
|
| 29 |
+
from pyarrow.lib cimport (_Weakrefable, Buffer, Schema,
|
| 30 |
+
check_status,
|
| 31 |
+
MemoryPool, maybe_unbox_memory_pool,
|
| 32 |
+
Table, KeyValueMetadata,
|
| 33 |
+
pyarrow_wrap_chunked_array,
|
| 34 |
+
pyarrow_wrap_schema,
|
| 35 |
+
pyarrow_unwrap_metadata,
|
| 36 |
+
pyarrow_unwrap_schema,
|
| 37 |
+
pyarrow_wrap_table,
|
| 38 |
+
pyarrow_wrap_batch,
|
| 39 |
+
pyarrow_wrap_scalar,
|
| 40 |
+
NativeFile, get_reader, get_writer,
|
| 41 |
+
string_to_timeunit)
|
| 42 |
+
|
| 43 |
+
from pyarrow.lib import (ArrowException, NativeFile, BufferOutputStream,
|
| 44 |
+
_stringify_path,
|
| 45 |
+
tobytes, frombytes, is_threading_enabled)
|
| 46 |
+
|
| 47 |
+
cimport cpython as cp
|
| 48 |
+
|
| 49 |
+
_DEFAULT_ROW_GROUP_SIZE = 1024*1024
|
| 50 |
+
_MAX_ROW_GROUP_SIZE = 64*1024*1024
|
| 51 |
+
|
| 52 |
+
cdef class Statistics(_Weakrefable):
|
| 53 |
+
"""Statistics for a single column in a single row group."""
|
| 54 |
+
|
| 55 |
+
def __cinit__(self):
|
| 56 |
+
pass
|
| 57 |
+
|
| 58 |
+
def __repr__(self):
|
| 59 |
+
return """{}
|
| 60 |
+
has_min_max: {}
|
| 61 |
+
min: {}
|
| 62 |
+
max: {}
|
| 63 |
+
null_count: {}
|
| 64 |
+
distinct_count: {}
|
| 65 |
+
num_values: {}
|
| 66 |
+
physical_type: {}
|
| 67 |
+
logical_type: {}
|
| 68 |
+
converted_type (legacy): {}""".format(object.__repr__(self),
|
| 69 |
+
self.has_min_max,
|
| 70 |
+
self.min,
|
| 71 |
+
self.max,
|
| 72 |
+
self.null_count,
|
| 73 |
+
self.distinct_count,
|
| 74 |
+
self.num_values,
|
| 75 |
+
self.physical_type,
|
| 76 |
+
str(self.logical_type),
|
| 77 |
+
self.converted_type)
|
| 78 |
+
|
| 79 |
+
def to_dict(self):
|
| 80 |
+
"""
|
| 81 |
+
Get dictionary representation of statistics.
|
| 82 |
+
|
| 83 |
+
Returns
|
| 84 |
+
-------
|
| 85 |
+
dict
|
| 86 |
+
Dictionary with a key for each attribute of this class.
|
| 87 |
+
"""
|
| 88 |
+
d = dict(
|
| 89 |
+
has_min_max=self.has_min_max,
|
| 90 |
+
min=self.min,
|
| 91 |
+
max=self.max,
|
| 92 |
+
null_count=self.null_count,
|
| 93 |
+
distinct_count=self.distinct_count,
|
| 94 |
+
num_values=self.num_values,
|
| 95 |
+
physical_type=self.physical_type
|
| 96 |
+
)
|
| 97 |
+
return d
|
| 98 |
+
|
| 99 |
+
def __eq__(self, other):
|
| 100 |
+
try:
|
| 101 |
+
return self.equals(other)
|
| 102 |
+
except TypeError:
|
| 103 |
+
return NotImplemented
|
| 104 |
+
|
| 105 |
+
def equals(self, Statistics other):
|
| 106 |
+
"""
|
| 107 |
+
Return whether the two column statistics objects are equal.
|
| 108 |
+
|
| 109 |
+
Parameters
|
| 110 |
+
----------
|
| 111 |
+
other : Statistics
|
| 112 |
+
Statistics to compare against.
|
| 113 |
+
|
| 114 |
+
Returns
|
| 115 |
+
-------
|
| 116 |
+
are_equal : bool
|
| 117 |
+
"""
|
| 118 |
+
return self.statistics.get().Equals(deref(other.statistics.get()))
|
| 119 |
+
|
| 120 |
+
@property
|
| 121 |
+
def has_min_max(self):
|
| 122 |
+
"""Whether min and max are present (bool)."""
|
| 123 |
+
return self.statistics.get().HasMinMax()
|
| 124 |
+
|
| 125 |
+
@property
|
| 126 |
+
def has_null_count(self):
|
| 127 |
+
"""Whether null count is present (bool)."""
|
| 128 |
+
return self.statistics.get().HasNullCount()
|
| 129 |
+
|
| 130 |
+
@property
|
| 131 |
+
def has_distinct_count(self):
|
| 132 |
+
"""Whether distinct count is preset (bool)."""
|
| 133 |
+
return self.statistics.get().HasDistinctCount()
|
| 134 |
+
|
| 135 |
+
@property
|
| 136 |
+
def min_raw(self):
|
| 137 |
+
"""Min value as physical type (bool, int, float, or bytes)."""
|
| 138 |
+
if self.has_min_max:
|
| 139 |
+
return _cast_statistic_raw_min(self.statistics.get())
|
| 140 |
+
else:
|
| 141 |
+
return None
|
| 142 |
+
|
| 143 |
+
@property
|
| 144 |
+
def max_raw(self):
|
| 145 |
+
"""Max value as physical type (bool, int, float, or bytes)."""
|
| 146 |
+
if self.has_min_max:
|
| 147 |
+
return _cast_statistic_raw_max(self.statistics.get())
|
| 148 |
+
else:
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
@property
|
| 152 |
+
def min(self):
|
| 153 |
+
"""
|
| 154 |
+
Min value as logical type.
|
| 155 |
+
|
| 156 |
+
Returned as the Python equivalent of logical type, such as datetime.date
|
| 157 |
+
for dates and decimal.Decimal for decimals.
|
| 158 |
+
"""
|
| 159 |
+
if self.has_min_max:
|
| 160 |
+
min_scalar, _ = _cast_statistics(self.statistics.get())
|
| 161 |
+
return min_scalar.as_py()
|
| 162 |
+
else:
|
| 163 |
+
return None
|
| 164 |
+
|
| 165 |
+
@property
|
| 166 |
+
def max(self):
|
| 167 |
+
"""
|
| 168 |
+
Max value as logical type.
|
| 169 |
+
|
| 170 |
+
Returned as the Python equivalent of logical type, such as datetime.date
|
| 171 |
+
for dates and decimal.Decimal for decimals.
|
| 172 |
+
"""
|
| 173 |
+
if self.has_min_max:
|
| 174 |
+
_, max_scalar = _cast_statistics(self.statistics.get())
|
| 175 |
+
return max_scalar.as_py()
|
| 176 |
+
else:
|
| 177 |
+
return None
|
| 178 |
+
|
| 179 |
+
@property
|
| 180 |
+
def null_count(self):
|
| 181 |
+
"""Number of null values in chunk (int)."""
|
| 182 |
+
if self.has_null_count:
|
| 183 |
+
return self.statistics.get().null_count()
|
| 184 |
+
else:
|
| 185 |
+
return None
|
| 186 |
+
|
| 187 |
+
@property
|
| 188 |
+
def distinct_count(self):
|
| 189 |
+
"""Distinct number of values in chunk (int)."""
|
| 190 |
+
if self.has_distinct_count:
|
| 191 |
+
return self.statistics.get().distinct_count()
|
| 192 |
+
else:
|
| 193 |
+
return None
|
| 194 |
+
|
| 195 |
+
@property
|
| 196 |
+
def num_values(self):
|
| 197 |
+
"""Number of non-null values (int)."""
|
| 198 |
+
return self.statistics.get().num_values()
|
| 199 |
+
|
| 200 |
+
@property
|
| 201 |
+
def physical_type(self):
|
| 202 |
+
"""Physical type of column (str)."""
|
| 203 |
+
raw_physical_type = self.statistics.get().physical_type()
|
| 204 |
+
return physical_type_name_from_enum(raw_physical_type)
|
| 205 |
+
|
| 206 |
+
@property
|
| 207 |
+
def logical_type(self):
|
| 208 |
+
"""Logical type of column (:class:`ParquetLogicalType`)."""
|
| 209 |
+
return wrap_logical_type(self.statistics.get().descr().logical_type())
|
| 210 |
+
|
| 211 |
+
@property
|
| 212 |
+
def converted_type(self):
|
| 213 |
+
"""Legacy converted type (str or None)."""
|
| 214 |
+
raw_converted_type = self.statistics.get().descr().converted_type()
|
| 215 |
+
return converted_type_name_from_enum(raw_converted_type)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
cdef class ParquetLogicalType(_Weakrefable):
|
| 219 |
+
"""Logical type of parquet type."""
|
| 220 |
+
cdef:
|
| 221 |
+
shared_ptr[const CParquetLogicalType] type
|
| 222 |
+
|
| 223 |
+
def __cinit__(self):
|
| 224 |
+
pass
|
| 225 |
+
|
| 226 |
+
cdef init(self, const shared_ptr[const CParquetLogicalType]& type):
|
| 227 |
+
self.type = type
|
| 228 |
+
|
| 229 |
+
def __repr__(self):
|
| 230 |
+
return "{}\n {}".format(object.__repr__(self), str(self))
|
| 231 |
+
|
| 232 |
+
def __str__(self):
|
| 233 |
+
return frombytes(self.type.get().ToString(), safe=True)
|
| 234 |
+
|
| 235 |
+
def to_json(self):
|
| 236 |
+
"""
|
| 237 |
+
Get a JSON string containing type and type parameters.
|
| 238 |
+
|
| 239 |
+
Returns
|
| 240 |
+
-------
|
| 241 |
+
json : str
|
| 242 |
+
JSON representation of type, with at least a field called 'Type'
|
| 243 |
+
which contains the type name. If the type is parameterized, such
|
| 244 |
+
as a decimal with scale and precision, will contain those as fields
|
| 245 |
+
as well.
|
| 246 |
+
"""
|
| 247 |
+
return frombytes(self.type.get().ToJSON())
|
| 248 |
+
|
| 249 |
+
@property
|
| 250 |
+
def type(self):
|
| 251 |
+
"""Name of the logical type (str)."""
|
| 252 |
+
return logical_type_name_from_enum(self.type.get().type())
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
cdef wrap_logical_type(const shared_ptr[const CParquetLogicalType]& type):
|
| 256 |
+
cdef ParquetLogicalType out = ParquetLogicalType()
|
| 257 |
+
out.init(type)
|
| 258 |
+
return out
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
cdef _cast_statistic_raw_min(CStatistics* statistics):
|
| 262 |
+
cdef ParquetType physical_type = statistics.physical_type()
|
| 263 |
+
cdef uint32_t type_length = statistics.descr().type_length()
|
| 264 |
+
if physical_type == ParquetType_BOOLEAN:
|
| 265 |
+
return (<CBoolStatistics*> statistics).min()
|
| 266 |
+
elif physical_type == ParquetType_INT32:
|
| 267 |
+
return (<CInt32Statistics*> statistics).min()
|
| 268 |
+
elif physical_type == ParquetType_INT64:
|
| 269 |
+
return (<CInt64Statistics*> statistics).min()
|
| 270 |
+
elif physical_type == ParquetType_FLOAT:
|
| 271 |
+
return (<CFloatStatistics*> statistics).min()
|
| 272 |
+
elif physical_type == ParquetType_DOUBLE:
|
| 273 |
+
return (<CDoubleStatistics*> statistics).min()
|
| 274 |
+
elif physical_type == ParquetType_BYTE_ARRAY:
|
| 275 |
+
return _box_byte_array((<CByteArrayStatistics*> statistics).min())
|
| 276 |
+
elif physical_type == ParquetType_FIXED_LEN_BYTE_ARRAY:
|
| 277 |
+
return _box_flba((<CFLBAStatistics*> statistics).min(), type_length)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
cdef _cast_statistic_raw_max(CStatistics* statistics):
|
| 281 |
+
cdef ParquetType physical_type = statistics.physical_type()
|
| 282 |
+
cdef uint32_t type_length = statistics.descr().type_length()
|
| 283 |
+
if physical_type == ParquetType_BOOLEAN:
|
| 284 |
+
return (<CBoolStatistics*> statistics).max()
|
| 285 |
+
elif physical_type == ParquetType_INT32:
|
| 286 |
+
return (<CInt32Statistics*> statistics).max()
|
| 287 |
+
elif physical_type == ParquetType_INT64:
|
| 288 |
+
return (<CInt64Statistics*> statistics).max()
|
| 289 |
+
elif physical_type == ParquetType_FLOAT:
|
| 290 |
+
return (<CFloatStatistics*> statistics).max()
|
| 291 |
+
elif physical_type == ParquetType_DOUBLE:
|
| 292 |
+
return (<CDoubleStatistics*> statistics).max()
|
| 293 |
+
elif physical_type == ParquetType_BYTE_ARRAY:
|
| 294 |
+
return _box_byte_array((<CByteArrayStatistics*> statistics).max())
|
| 295 |
+
elif physical_type == ParquetType_FIXED_LEN_BYTE_ARRAY:
|
| 296 |
+
return _box_flba((<CFLBAStatistics*> statistics).max(), type_length)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
cdef _cast_statistics(CStatistics* statistics):
|
| 300 |
+
cdef:
|
| 301 |
+
shared_ptr[CScalar] c_min
|
| 302 |
+
shared_ptr[CScalar] c_max
|
| 303 |
+
check_status(StatisticsAsScalars(statistics[0], &c_min, &c_max))
|
| 304 |
+
return (pyarrow_wrap_scalar(c_min), pyarrow_wrap_scalar(c_max))
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
cdef _box_byte_array(ParquetByteArray val):
|
| 308 |
+
return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> val.len)
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
cdef _box_flba(ParquetFLBA val, uint32_t len):
|
| 312 |
+
return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> len)
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
cdef class ColumnChunkMetaData(_Weakrefable):
|
| 316 |
+
"""Column metadata for a single row group."""
|
| 317 |
+
|
| 318 |
+
def __cinit__(self):
|
| 319 |
+
pass
|
| 320 |
+
|
| 321 |
+
def __repr__(self):
|
| 322 |
+
statistics = indent(repr(self.statistics), 4 * ' ')
|
| 323 |
+
return """{0}
|
| 324 |
+
file_offset: {1}
|
| 325 |
+
file_path: {2}
|
| 326 |
+
physical_type: {3}
|
| 327 |
+
num_values: {4}
|
| 328 |
+
path_in_schema: {5}
|
| 329 |
+
is_stats_set: {6}
|
| 330 |
+
statistics:
|
| 331 |
+
{7}
|
| 332 |
+
compression: {8}
|
| 333 |
+
encodings: {9}
|
| 334 |
+
has_dictionary_page: {10}
|
| 335 |
+
dictionary_page_offset: {11}
|
| 336 |
+
data_page_offset: {12}
|
| 337 |
+
total_compressed_size: {13}
|
| 338 |
+
total_uncompressed_size: {14}""".format(object.__repr__(self),
|
| 339 |
+
self.file_offset,
|
| 340 |
+
self.file_path,
|
| 341 |
+
self.physical_type,
|
| 342 |
+
self.num_values,
|
| 343 |
+
self.path_in_schema,
|
| 344 |
+
self.is_stats_set,
|
| 345 |
+
statistics,
|
| 346 |
+
self.compression,
|
| 347 |
+
self.encodings,
|
| 348 |
+
self.has_dictionary_page,
|
| 349 |
+
self.dictionary_page_offset,
|
| 350 |
+
self.data_page_offset,
|
| 351 |
+
self.total_compressed_size,
|
| 352 |
+
self.total_uncompressed_size)
|
| 353 |
+
|
| 354 |
+
def to_dict(self):
|
| 355 |
+
"""
|
| 356 |
+
Get dictionary representation of the column chunk metadata.
|
| 357 |
+
|
| 358 |
+
Returns
|
| 359 |
+
-------
|
| 360 |
+
dict
|
| 361 |
+
Dictionary with a key for each attribute of this class.
|
| 362 |
+
"""
|
| 363 |
+
statistics = self.statistics.to_dict() if self.is_stats_set else None
|
| 364 |
+
d = dict(
|
| 365 |
+
file_offset=self.file_offset,
|
| 366 |
+
file_path=self.file_path,
|
| 367 |
+
physical_type=self.physical_type,
|
| 368 |
+
num_values=self.num_values,
|
| 369 |
+
path_in_schema=self.path_in_schema,
|
| 370 |
+
is_stats_set=self.is_stats_set,
|
| 371 |
+
statistics=statistics,
|
| 372 |
+
compression=self.compression,
|
| 373 |
+
encodings=self.encodings,
|
| 374 |
+
has_dictionary_page=self.has_dictionary_page,
|
| 375 |
+
dictionary_page_offset=self.dictionary_page_offset,
|
| 376 |
+
data_page_offset=self.data_page_offset,
|
| 377 |
+
total_compressed_size=self.total_compressed_size,
|
| 378 |
+
total_uncompressed_size=self.total_uncompressed_size
|
| 379 |
+
)
|
| 380 |
+
return d
|
| 381 |
+
|
| 382 |
+
def __eq__(self, other):
|
| 383 |
+
try:
|
| 384 |
+
return self.equals(other)
|
| 385 |
+
except TypeError:
|
| 386 |
+
return NotImplemented
|
| 387 |
+
|
| 388 |
+
def equals(self, ColumnChunkMetaData other):
|
| 389 |
+
"""
|
| 390 |
+
Return whether the two column chunk metadata objects are equal.
|
| 391 |
+
|
| 392 |
+
Parameters
|
| 393 |
+
----------
|
| 394 |
+
other : ColumnChunkMetaData
|
| 395 |
+
Metadata to compare against.
|
| 396 |
+
|
| 397 |
+
Returns
|
| 398 |
+
-------
|
| 399 |
+
are_equal : bool
|
| 400 |
+
"""
|
| 401 |
+
return self.metadata.Equals(deref(other.metadata))
|
| 402 |
+
|
| 403 |
+
@property
|
| 404 |
+
def file_offset(self):
|
| 405 |
+
"""Offset into file where column chunk is located (int)."""
|
| 406 |
+
return self.metadata.file_offset()
|
| 407 |
+
|
| 408 |
+
@property
|
| 409 |
+
def file_path(self):
|
| 410 |
+
"""Optional file path if set (str or None)."""
|
| 411 |
+
return frombytes(self.metadata.file_path())
|
| 412 |
+
|
| 413 |
+
@property
|
| 414 |
+
def physical_type(self):
|
| 415 |
+
"""Physical type of column (str)."""
|
| 416 |
+
return physical_type_name_from_enum(self.metadata.type())
|
| 417 |
+
|
| 418 |
+
@property
|
| 419 |
+
def num_values(self):
|
| 420 |
+
"""Total number of values (int)."""
|
| 421 |
+
return self.metadata.num_values()
|
| 422 |
+
|
| 423 |
+
@property
|
| 424 |
+
def path_in_schema(self):
|
| 425 |
+
"""Nested path to field, separated by periods (str)."""
|
| 426 |
+
path = self.metadata.path_in_schema().get().ToDotString()
|
| 427 |
+
return frombytes(path)
|
| 428 |
+
|
| 429 |
+
@property
|
| 430 |
+
def is_stats_set(self):
|
| 431 |
+
"""Whether or not statistics are present in metadata (bool)."""
|
| 432 |
+
return self.metadata.is_stats_set()
|
| 433 |
+
|
| 434 |
+
@property
|
| 435 |
+
def statistics(self):
|
| 436 |
+
"""Statistics for column chunk (:class:`Statistics`)."""
|
| 437 |
+
if not self.metadata.is_stats_set():
|
| 438 |
+
return None
|
| 439 |
+
statistics = Statistics()
|
| 440 |
+
statistics.init(self.metadata.statistics(), self)
|
| 441 |
+
return statistics
|
| 442 |
+
|
| 443 |
+
@property
|
| 444 |
+
def compression(self):
|
| 445 |
+
"""
|
| 446 |
+
Type of compression used for column (str).
|
| 447 |
+
|
| 448 |
+
One of 'UNCOMPRESSED', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD',
|
| 449 |
+
or 'UNKNOWN'.
|
| 450 |
+
"""
|
| 451 |
+
return compression_name_from_enum(self.metadata.compression())
|
| 452 |
+
|
| 453 |
+
@property
|
| 454 |
+
def encodings(self):
|
| 455 |
+
"""
|
| 456 |
+
Encodings used for column (tuple of str).
|
| 457 |
+
|
| 458 |
+
One of 'PLAIN', 'BIT_PACKED', 'RLE', 'BYTE_STREAM_SPLIT', 'DELTA_BINARY_PACKED',
|
| 459 |
+
'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'.
|
| 460 |
+
"""
|
| 461 |
+
return tuple(map(encoding_name_from_enum, self.metadata.encodings()))
|
| 462 |
+
|
| 463 |
+
@property
|
| 464 |
+
def has_dictionary_page(self):
|
| 465 |
+
"""Whether there is dictionary data present in the column chunk (bool)."""
|
| 466 |
+
return bool(self.metadata.has_dictionary_page())
|
| 467 |
+
|
| 468 |
+
@property
|
| 469 |
+
def dictionary_page_offset(self):
|
| 470 |
+
"""Offset of dictionary page relative to beginning of the file (int)."""
|
| 471 |
+
if self.has_dictionary_page:
|
| 472 |
+
return self.metadata.dictionary_page_offset()
|
| 473 |
+
else:
|
| 474 |
+
return None
|
| 475 |
+
|
| 476 |
+
@property
|
| 477 |
+
def data_page_offset(self):
|
| 478 |
+
"""Offset of data page relative to beginning of the file (int)."""
|
| 479 |
+
return self.metadata.data_page_offset()
|
| 480 |
+
|
| 481 |
+
@property
|
| 482 |
+
def has_index_page(self):
|
| 483 |
+
"""Not yet supported."""
|
| 484 |
+
raise NotImplementedError('not supported in parquet-cpp')
|
| 485 |
+
|
| 486 |
+
@property
|
| 487 |
+
def index_page_offset(self):
|
| 488 |
+
"""Not yet supported."""
|
| 489 |
+
raise NotImplementedError("parquet-cpp doesn't return valid values")
|
| 490 |
+
|
| 491 |
+
@property
|
| 492 |
+
def total_compressed_size(self):
|
| 493 |
+
"""Compressed size in bytes (int)."""
|
| 494 |
+
return self.metadata.total_compressed_size()
|
| 495 |
+
|
| 496 |
+
@property
|
| 497 |
+
def total_uncompressed_size(self):
|
| 498 |
+
"""Uncompressed size in bytes (int)."""
|
| 499 |
+
return self.metadata.total_uncompressed_size()
|
| 500 |
+
|
| 501 |
+
@property
|
| 502 |
+
def has_offset_index(self):
|
| 503 |
+
"""Whether the column chunk has an offset index"""
|
| 504 |
+
return self.metadata.GetOffsetIndexLocation().has_value()
|
| 505 |
+
|
| 506 |
+
@property
|
| 507 |
+
def has_column_index(self):
|
| 508 |
+
"""Whether the column chunk has a column index"""
|
| 509 |
+
return self.metadata.GetColumnIndexLocation().has_value()
|
| 510 |
+
|
| 511 |
+
@property
|
| 512 |
+
def metadata(self):
|
| 513 |
+
"""Additional metadata as key value pairs (dict[bytes, bytes])."""
|
| 514 |
+
cdef:
|
| 515 |
+
unordered_map[c_string, c_string] metadata
|
| 516 |
+
const CKeyValueMetadata* underlying_metadata
|
| 517 |
+
underlying_metadata = self.metadata.key_value_metadata().get()
|
| 518 |
+
if underlying_metadata != NULL:
|
| 519 |
+
underlying_metadata.ToUnorderedMap(&metadata)
|
| 520 |
+
return metadata
|
| 521 |
+
else:
|
| 522 |
+
return None
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
cdef class SortingColumn:
|
| 526 |
+
"""
|
| 527 |
+
Sorting specification for a single column.
|
| 528 |
+
|
| 529 |
+
Returned by :meth:`RowGroupMetaData.sorting_columns` and used in
|
| 530 |
+
:class:`ParquetWriter` to specify the sort order of the data.
|
| 531 |
+
|
| 532 |
+
Parameters
|
| 533 |
+
----------
|
| 534 |
+
column_index : int
|
| 535 |
+
Index of column that data is sorted by.
|
| 536 |
+
descending : bool, default False
|
| 537 |
+
Whether column is sorted in descending order.
|
| 538 |
+
nulls_first : bool, default False
|
| 539 |
+
Whether null values appear before valid values.
|
| 540 |
+
|
| 541 |
+
Notes
|
| 542 |
+
-----
|
| 543 |
+
|
| 544 |
+
Column indices are zero-based, refer only to leaf fields, and are in
|
| 545 |
+
depth-first order. This may make the column indices for nested schemas
|
| 546 |
+
different from what you expect. In most cases, it will be easier to
|
| 547 |
+
specify the sort order using column names instead of column indices
|
| 548 |
+
and converting using the ``from_ordering`` method.
|
| 549 |
+
|
| 550 |
+
Examples
|
| 551 |
+
--------
|
| 552 |
+
|
| 553 |
+
In other APIs, sort order is specified by names, such as:
|
| 554 |
+
|
| 555 |
+
>>> sort_order = [('id', 'ascending'), ('timestamp', 'descending')]
|
| 556 |
+
|
| 557 |
+
For Parquet, the column index must be used instead:
|
| 558 |
+
|
| 559 |
+
>>> import pyarrow.parquet as pq
|
| 560 |
+
>>> [pq.SortingColumn(0), pq.SortingColumn(1, descending=True)]
|
| 561 |
+
[SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False)]
|
| 562 |
+
|
| 563 |
+
Convert the sort_order into the list of sorting columns with
|
| 564 |
+
``from_ordering`` (note that the schema must be provided as well):
|
| 565 |
+
|
| 566 |
+
>>> import pyarrow as pa
|
| 567 |
+
>>> schema = pa.schema([('id', pa.int64()), ('timestamp', pa.timestamp('ms'))])
|
| 568 |
+
>>> sorting_columns = pq.SortingColumn.from_ordering(schema, sort_order)
|
| 569 |
+
>>> sorting_columns
|
| 570 |
+
(SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False))
|
| 571 |
+
|
| 572 |
+
Convert back to the sort order with ``to_ordering``:
|
| 573 |
+
|
| 574 |
+
>>> pq.SortingColumn.to_ordering(schema, sorting_columns)
|
| 575 |
+
((('id', 'ascending'), ('timestamp', 'descending')), 'at_end')
|
| 576 |
+
|
| 577 |
+
See Also
|
| 578 |
+
--------
|
| 579 |
+
RowGroupMetaData.sorting_columns
|
| 580 |
+
"""
|
| 581 |
+
cdef int column_index
|
| 582 |
+
cdef c_bool descending
|
| 583 |
+
cdef c_bool nulls_first
|
| 584 |
+
|
| 585 |
+
def __init__(self, int column_index, c_bool descending=False, c_bool nulls_first=False):
|
| 586 |
+
self.column_index = column_index
|
| 587 |
+
self.descending = descending
|
| 588 |
+
self.nulls_first = nulls_first
|
| 589 |
+
|
| 590 |
+
@classmethod
|
| 591 |
+
def from_ordering(cls, Schema schema, sort_keys, null_placement='at_end'):
|
| 592 |
+
"""
|
| 593 |
+
Create a tuple of SortingColumn objects from the same arguments as
|
| 594 |
+
:class:`pyarrow.compute.SortOptions`.
|
| 595 |
+
|
| 596 |
+
Parameters
|
| 597 |
+
----------
|
| 598 |
+
schema : Schema
|
| 599 |
+
Schema of the input data.
|
| 600 |
+
sort_keys : Sequence of (name, order) tuples
|
| 601 |
+
Names of field/column keys (str) to sort the input on,
|
| 602 |
+
along with the order each field/column is sorted in.
|
| 603 |
+
Accepted values for `order` are "ascending", "descending".
|
| 604 |
+
null_placement : {'at_start', 'at_end'}, default 'at_end'
|
| 605 |
+
Where null values should appear in the sort order.
|
| 606 |
+
|
| 607 |
+
Returns
|
| 608 |
+
-------
|
| 609 |
+
sorting_columns : tuple of SortingColumn
|
| 610 |
+
"""
|
| 611 |
+
if null_placement == 'at_start':
|
| 612 |
+
nulls_first = True
|
| 613 |
+
elif null_placement == 'at_end':
|
| 614 |
+
nulls_first = False
|
| 615 |
+
else:
|
| 616 |
+
raise ValueError('null_placement must be "at_start" or "at_end"')
|
| 617 |
+
|
| 618 |
+
col_map = _name_to_index_map(schema)
|
| 619 |
+
|
| 620 |
+
sorting_columns = []
|
| 621 |
+
|
| 622 |
+
for sort_key in sort_keys:
|
| 623 |
+
if isinstance(sort_key, str):
|
| 624 |
+
name = sort_key
|
| 625 |
+
descending = False
|
| 626 |
+
elif (isinstance(sort_key, tuple) and len(sort_key) == 2 and
|
| 627 |
+
isinstance(sort_key[0], str) and
|
| 628 |
+
isinstance(sort_key[1], str)):
|
| 629 |
+
name, descending = sort_key
|
| 630 |
+
if descending == "descending":
|
| 631 |
+
descending = True
|
| 632 |
+
elif descending == "ascending":
|
| 633 |
+
descending = False
|
| 634 |
+
else:
|
| 635 |
+
raise ValueError("Invalid sort key direction: {0}"
|
| 636 |
+
.format(descending))
|
| 637 |
+
else:
|
| 638 |
+
raise ValueError("Invalid sort key: {0}".format(sort_key))
|
| 639 |
+
|
| 640 |
+
try:
|
| 641 |
+
column_index = col_map[name]
|
| 642 |
+
except KeyError:
|
| 643 |
+
raise ValueError("Sort key name '{0}' not found in schema:\n{1}"
|
| 644 |
+
.format(name, schema))
|
| 645 |
+
|
| 646 |
+
sorting_columns.append(
|
| 647 |
+
cls(column_index, descending=descending, nulls_first=nulls_first)
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
return tuple(sorting_columns)
|
| 651 |
+
|
| 652 |
+
@staticmethod
|
| 653 |
+
def to_ordering(Schema schema, sorting_columns):
|
| 654 |
+
"""
|
| 655 |
+
Convert a tuple of SortingColumn objects to the same format as
|
| 656 |
+
:class:`pyarrow.compute.SortOptions`.
|
| 657 |
+
|
| 658 |
+
Parameters
|
| 659 |
+
----------
|
| 660 |
+
schema : Schema
|
| 661 |
+
Schema of the input data.
|
| 662 |
+
sorting_columns : tuple of SortingColumn
|
| 663 |
+
Columns to sort the input on.
|
| 664 |
+
|
| 665 |
+
Returns
|
| 666 |
+
-------
|
| 667 |
+
sort_keys : tuple of (name, order) tuples
|
| 668 |
+
null_placement : {'at_start', 'at_end'}
|
| 669 |
+
"""
|
| 670 |
+
col_map = {i: name for name, i in _name_to_index_map(schema).items()}
|
| 671 |
+
|
| 672 |
+
sort_keys = []
|
| 673 |
+
nulls_first = None
|
| 674 |
+
|
| 675 |
+
for sorting_column in sorting_columns:
|
| 676 |
+
name = col_map[sorting_column.column_index]
|
| 677 |
+
if sorting_column.descending:
|
| 678 |
+
order = "descending"
|
| 679 |
+
else:
|
| 680 |
+
order = "ascending"
|
| 681 |
+
sort_keys.append((name, order))
|
| 682 |
+
if nulls_first is None:
|
| 683 |
+
nulls_first = sorting_column.nulls_first
|
| 684 |
+
elif nulls_first != sorting_column.nulls_first:
|
| 685 |
+
raise ValueError("Sorting columns have inconsistent null placement")
|
| 686 |
+
|
| 687 |
+
if nulls_first:
|
| 688 |
+
null_placement = "at_start"
|
| 689 |
+
else:
|
| 690 |
+
null_placement = "at_end"
|
| 691 |
+
|
| 692 |
+
return tuple(sort_keys), null_placement
|
| 693 |
+
|
| 694 |
+
def __repr__(self):
|
| 695 |
+
return """{}(column_index={}, descending={}, nulls_first={})""".format(
|
| 696 |
+
self.__class__.__name__,
|
| 697 |
+
self.column_index, self.descending, self.nulls_first)
|
| 698 |
+
|
| 699 |
+
def __eq__(self, SortingColumn other):
|
| 700 |
+
return (self.column_index == other.column_index and
|
| 701 |
+
self.descending == other.descending and
|
| 702 |
+
self.nulls_first == other.nulls_first)
|
| 703 |
+
|
| 704 |
+
def __hash__(self):
|
| 705 |
+
return hash((self.column_index, self.descending, self.nulls_first))
|
| 706 |
+
|
| 707 |
+
@property
|
| 708 |
+
def column_index(self):
|
| 709 |
+
""""Index of column data is sorted by (int)."""
|
| 710 |
+
return self.column_index
|
| 711 |
+
|
| 712 |
+
@property
|
| 713 |
+
def descending(self):
|
| 714 |
+
"""Whether column is sorted in descending order (bool)."""
|
| 715 |
+
return self.descending
|
| 716 |
+
|
| 717 |
+
@property
|
| 718 |
+
def nulls_first(self):
|
| 719 |
+
"""Whether null values appear before valid values (bool)."""
|
| 720 |
+
return self.nulls_first
|
| 721 |
+
|
| 722 |
+
def to_dict(self):
|
| 723 |
+
"""
|
| 724 |
+
Get dictionary representation of the SortingColumn.
|
| 725 |
+
|
| 726 |
+
Returns
|
| 727 |
+
-------
|
| 728 |
+
dict
|
| 729 |
+
Dictionary with a key for each attribute of this class.
|
| 730 |
+
"""
|
| 731 |
+
d = dict(
|
| 732 |
+
column_index=self.column_index,
|
| 733 |
+
descending=self.descending,
|
| 734 |
+
nulls_first=self.nulls_first
|
| 735 |
+
)
|
| 736 |
+
return d
|
| 737 |
+
|
| 738 |
+
|
| 739 |
+
cdef class RowGroupMetaData(_Weakrefable):
|
| 740 |
+
"""Metadata for a single row group."""
|
| 741 |
+
|
| 742 |
+
def __cinit__(self, FileMetaData parent, int index):
|
| 743 |
+
if index < 0 or index >= parent.num_row_groups:
|
| 744 |
+
raise IndexError('{0} out of bounds'.format(index))
|
| 745 |
+
self.up_metadata = parent._metadata.RowGroup(index)
|
| 746 |
+
self.metadata = self.up_metadata.get()
|
| 747 |
+
self.parent = parent
|
| 748 |
+
self.index = index
|
| 749 |
+
|
| 750 |
+
def __reduce__(self):
|
| 751 |
+
return RowGroupMetaData, (self.parent, self.index)
|
| 752 |
+
|
| 753 |
+
def __eq__(self, other):
|
| 754 |
+
try:
|
| 755 |
+
return self.equals(other)
|
| 756 |
+
except TypeError:
|
| 757 |
+
return NotImplemented
|
| 758 |
+
|
| 759 |
+
def equals(self, RowGroupMetaData other):
|
| 760 |
+
"""
|
| 761 |
+
Return whether the two row group metadata objects are equal.
|
| 762 |
+
|
| 763 |
+
Parameters
|
| 764 |
+
----------
|
| 765 |
+
other : RowGroupMetaData
|
| 766 |
+
Metadata to compare against.
|
| 767 |
+
|
| 768 |
+
Returns
|
| 769 |
+
-------
|
| 770 |
+
are_equal : bool
|
| 771 |
+
"""
|
| 772 |
+
return self.metadata.Equals(deref(other.metadata))
|
| 773 |
+
|
| 774 |
+
def column(self, int i):
|
| 775 |
+
"""
|
| 776 |
+
Get column metadata at given index.
|
| 777 |
+
|
| 778 |
+
Parameters
|
| 779 |
+
----------
|
| 780 |
+
i : int
|
| 781 |
+
Index of column to get metadata for.
|
| 782 |
+
|
| 783 |
+
Returns
|
| 784 |
+
-------
|
| 785 |
+
ColumnChunkMetaData
|
| 786 |
+
Metadata for column within this chunk.
|
| 787 |
+
"""
|
| 788 |
+
if i < 0 or i >= self.num_columns:
|
| 789 |
+
raise IndexError('{0} out of bounds'.format(i))
|
| 790 |
+
chunk = ColumnChunkMetaData()
|
| 791 |
+
chunk.init(self, i)
|
| 792 |
+
return chunk
|
| 793 |
+
|
| 794 |
+
def __repr__(self):
|
| 795 |
+
return """{0}
|
| 796 |
+
num_columns: {1}
|
| 797 |
+
num_rows: {2}
|
| 798 |
+
total_byte_size: {3}
|
| 799 |
+
sorting_columns: {4}""".format(object.__repr__(self),
|
| 800 |
+
self.num_columns,
|
| 801 |
+
self.num_rows,
|
| 802 |
+
self.total_byte_size,
|
| 803 |
+
self.sorting_columns)
|
| 804 |
+
|
| 805 |
+
def to_dict(self):
|
| 806 |
+
"""
|
| 807 |
+
Get dictionary representation of the row group metadata.
|
| 808 |
+
|
| 809 |
+
Returns
|
| 810 |
+
-------
|
| 811 |
+
dict
|
| 812 |
+
Dictionary with a key for each attribute of this class.
|
| 813 |
+
"""
|
| 814 |
+
columns = []
|
| 815 |
+
d = dict(
|
| 816 |
+
num_columns=self.num_columns,
|
| 817 |
+
num_rows=self.num_rows,
|
| 818 |
+
total_byte_size=self.total_byte_size,
|
| 819 |
+
columns=columns,
|
| 820 |
+
sorting_columns=[col.to_dict() for col in self.sorting_columns]
|
| 821 |
+
)
|
| 822 |
+
for i in range(self.num_columns):
|
| 823 |
+
columns.append(self.column(i).to_dict())
|
| 824 |
+
return d
|
| 825 |
+
|
| 826 |
+
@property
|
| 827 |
+
def num_columns(self):
|
| 828 |
+
"""Number of columns in this row group (int)."""
|
| 829 |
+
return self.metadata.num_columns()
|
| 830 |
+
|
| 831 |
+
@property
|
| 832 |
+
def num_rows(self):
|
| 833 |
+
"""Number of rows in this row group (int)."""
|
| 834 |
+
return self.metadata.num_rows()
|
| 835 |
+
|
| 836 |
+
@property
|
| 837 |
+
def total_byte_size(self):
|
| 838 |
+
"""Total byte size of all the uncompressed column data in this row group (int)."""
|
| 839 |
+
return self.metadata.total_byte_size()
|
| 840 |
+
|
| 841 |
+
@property
|
| 842 |
+
def sorting_columns(self):
|
| 843 |
+
"""Columns the row group is sorted by (tuple of :class:`SortingColumn`))."""
|
| 844 |
+
out = []
|
| 845 |
+
cdef vector[CSortingColumn] sorting_columns = self.metadata.sorting_columns()
|
| 846 |
+
for sorting_col in sorting_columns:
|
| 847 |
+
out.append(SortingColumn(
|
| 848 |
+
sorting_col.column_idx,
|
| 849 |
+
sorting_col.descending,
|
| 850 |
+
sorting_col.nulls_first
|
| 851 |
+
))
|
| 852 |
+
return tuple(out)
|
| 853 |
+
|
| 854 |
+
|
| 855 |
+
def _reconstruct_filemetadata(Buffer serialized):
|
| 856 |
+
cdef:
|
| 857 |
+
FileMetaData metadata = FileMetaData.__new__(FileMetaData)
|
| 858 |
+
CBuffer *buffer = serialized.buffer.get()
|
| 859 |
+
uint32_t metadata_len = <uint32_t>buffer.size()
|
| 860 |
+
|
| 861 |
+
metadata.init(CFileMetaData_Make(buffer.data(), &metadata_len))
|
| 862 |
+
|
| 863 |
+
return metadata
|
| 864 |
+
|
| 865 |
+
|
| 866 |
+
cdef class FileMetaData(_Weakrefable):
|
| 867 |
+
"""Parquet metadata for a single file."""
|
| 868 |
+
|
| 869 |
+
def __cinit__(self):
|
| 870 |
+
pass
|
| 871 |
+
|
| 872 |
+
def __reduce__(self):
|
| 873 |
+
cdef:
|
| 874 |
+
NativeFile sink = BufferOutputStream()
|
| 875 |
+
COutputStream* c_sink = sink.get_output_stream().get()
|
| 876 |
+
with nogil:
|
| 877 |
+
self._metadata.WriteTo(c_sink)
|
| 878 |
+
|
| 879 |
+
cdef Buffer buffer = sink.getvalue()
|
| 880 |
+
return _reconstruct_filemetadata, (buffer,)
|
| 881 |
+
|
| 882 |
+
def __hash__(self):
|
| 883 |
+
return hash((self.schema,
|
| 884 |
+
self.num_rows,
|
| 885 |
+
self.num_row_groups,
|
| 886 |
+
self.format_version,
|
| 887 |
+
self.serialized_size))
|
| 888 |
+
|
| 889 |
+
def __repr__(self):
|
| 890 |
+
return """{0}
|
| 891 |
+
created_by: {1}
|
| 892 |
+
num_columns: {2}
|
| 893 |
+
num_rows: {3}
|
| 894 |
+
num_row_groups: {4}
|
| 895 |
+
format_version: {5}
|
| 896 |
+
serialized_size: {6}""".format(object.__repr__(self),
|
| 897 |
+
self.created_by, self.num_columns,
|
| 898 |
+
self.num_rows, self.num_row_groups,
|
| 899 |
+
self.format_version,
|
| 900 |
+
self.serialized_size)
|
| 901 |
+
|
| 902 |
+
def to_dict(self):
|
| 903 |
+
"""
|
| 904 |
+
Get dictionary representation of the file metadata.
|
| 905 |
+
|
| 906 |
+
Returns
|
| 907 |
+
-------
|
| 908 |
+
dict
|
| 909 |
+
Dictionary with a key for each attribute of this class.
|
| 910 |
+
"""
|
| 911 |
+
row_groups = []
|
| 912 |
+
d = dict(
|
| 913 |
+
created_by=self.created_by,
|
| 914 |
+
num_columns=self.num_columns,
|
| 915 |
+
num_rows=self.num_rows,
|
| 916 |
+
num_row_groups=self.num_row_groups,
|
| 917 |
+
row_groups=row_groups,
|
| 918 |
+
format_version=self.format_version,
|
| 919 |
+
serialized_size=self.serialized_size
|
| 920 |
+
)
|
| 921 |
+
for i in range(self.num_row_groups):
|
| 922 |
+
row_groups.append(self.row_group(i).to_dict())
|
| 923 |
+
return d
|
| 924 |
+
|
| 925 |
+
def __eq__(self, other):
|
| 926 |
+
try:
|
| 927 |
+
return self.equals(other)
|
| 928 |
+
except TypeError:
|
| 929 |
+
return NotImplemented
|
| 930 |
+
|
| 931 |
+
def equals(self, FileMetaData other not None):
|
| 932 |
+
"""
|
| 933 |
+
Return whether the two file metadata objects are equal.
|
| 934 |
+
|
| 935 |
+
Parameters
|
| 936 |
+
----------
|
| 937 |
+
other : FileMetaData
|
| 938 |
+
Metadata to compare against.
|
| 939 |
+
|
| 940 |
+
Returns
|
| 941 |
+
-------
|
| 942 |
+
are_equal : bool
|
| 943 |
+
"""
|
| 944 |
+
return self._metadata.Equals(deref(other._metadata))
|
| 945 |
+
|
| 946 |
+
@property
|
| 947 |
+
def schema(self):
|
| 948 |
+
"""Schema of the file (:class:`ParquetSchema`)."""
|
| 949 |
+
if self._schema is None:
|
| 950 |
+
self._schema = ParquetSchema(self)
|
| 951 |
+
return self._schema
|
| 952 |
+
|
| 953 |
+
@property
|
| 954 |
+
def serialized_size(self):
|
| 955 |
+
"""Size of the original thrift encoded metadata footer (int)."""
|
| 956 |
+
return self._metadata.size()
|
| 957 |
+
|
| 958 |
+
@property
|
| 959 |
+
def num_columns(self):
|
| 960 |
+
"""Number of columns in file (int)."""
|
| 961 |
+
return self._metadata.num_columns()
|
| 962 |
+
|
| 963 |
+
@property
|
| 964 |
+
def num_rows(self):
|
| 965 |
+
"""Total number of rows in file (int)."""
|
| 966 |
+
return self._metadata.num_rows()
|
| 967 |
+
|
| 968 |
+
@property
|
| 969 |
+
def num_row_groups(self):
|
| 970 |
+
"""Number of row groups in file (int)."""
|
| 971 |
+
return self._metadata.num_row_groups()
|
| 972 |
+
|
| 973 |
+
@property
|
| 974 |
+
def format_version(self):
|
| 975 |
+
"""
|
| 976 |
+
Parquet format version used in file (str, such as '1.0', '2.4').
|
| 977 |
+
|
| 978 |
+
If version is missing or unparsable, will default to assuming '2.6'.
|
| 979 |
+
"""
|
| 980 |
+
cdef ParquetVersion version = self._metadata.version()
|
| 981 |
+
if version == ParquetVersion_V1:
|
| 982 |
+
return '1.0'
|
| 983 |
+
elif version == ParquetVersion_V2_0:
|
| 984 |
+
return 'pseudo-2.0'
|
| 985 |
+
elif version == ParquetVersion_V2_4:
|
| 986 |
+
return '2.4'
|
| 987 |
+
elif version == ParquetVersion_V2_6:
|
| 988 |
+
return '2.6'
|
| 989 |
+
else:
|
| 990 |
+
warnings.warn('Unrecognized file version, assuming 2.6: {}'
|
| 991 |
+
.format(version))
|
| 992 |
+
return '2.6'
|
| 993 |
+
|
| 994 |
+
@property
|
| 995 |
+
def created_by(self):
|
| 996 |
+
"""
|
| 997 |
+
String describing source of the parquet file (str).
|
| 998 |
+
|
| 999 |
+
This typically includes library name and version number. For example, Arrow 7.0's
|
| 1000 |
+
writer returns 'parquet-cpp-arrow version 7.0.0'.
|
| 1001 |
+
"""
|
| 1002 |
+
return frombytes(self._metadata.created_by())
|
| 1003 |
+
|
| 1004 |
+
@property
|
| 1005 |
+
def metadata(self):
|
| 1006 |
+
"""Additional metadata as key value pairs (dict[bytes, bytes])."""
|
| 1007 |
+
cdef:
|
| 1008 |
+
unordered_map[c_string, c_string] metadata
|
| 1009 |
+
const CKeyValueMetadata* underlying_metadata
|
| 1010 |
+
underlying_metadata = self._metadata.key_value_metadata().get()
|
| 1011 |
+
if underlying_metadata != NULL:
|
| 1012 |
+
underlying_metadata.ToUnorderedMap(&metadata)
|
| 1013 |
+
return metadata
|
| 1014 |
+
else:
|
| 1015 |
+
return None
|
| 1016 |
+
|
| 1017 |
+
def row_group(self, int i):
|
| 1018 |
+
"""
|
| 1019 |
+
Get metadata for row group at index i.
|
| 1020 |
+
|
| 1021 |
+
Parameters
|
| 1022 |
+
----------
|
| 1023 |
+
i : int
|
| 1024 |
+
Row group index to get.
|
| 1025 |
+
|
| 1026 |
+
Returns
|
| 1027 |
+
-------
|
| 1028 |
+
row_group_metadata : RowGroupMetaData
|
| 1029 |
+
"""
|
| 1030 |
+
return RowGroupMetaData(self, i)
|
| 1031 |
+
|
| 1032 |
+
def set_file_path(self, path):
|
| 1033 |
+
"""
|
| 1034 |
+
Set ColumnChunk file paths to the given value.
|
| 1035 |
+
|
| 1036 |
+
This method modifies the ``file_path`` field of each ColumnChunk
|
| 1037 |
+
in the FileMetaData to be a particular value.
|
| 1038 |
+
|
| 1039 |
+
Parameters
|
| 1040 |
+
----------
|
| 1041 |
+
path : str
|
| 1042 |
+
The file path to set on all ColumnChunks.
|
| 1043 |
+
"""
|
| 1044 |
+
cdef:
|
| 1045 |
+
c_string c_path = tobytes(path)
|
| 1046 |
+
self._metadata.set_file_path(c_path)
|
| 1047 |
+
|
| 1048 |
+
def append_row_groups(self, FileMetaData other):
|
| 1049 |
+
"""
|
| 1050 |
+
Append row groups from other FileMetaData object.
|
| 1051 |
+
|
| 1052 |
+
Parameters
|
| 1053 |
+
----------
|
| 1054 |
+
other : FileMetaData
|
| 1055 |
+
Other metadata to append row groups from.
|
| 1056 |
+
"""
|
| 1057 |
+
cdef shared_ptr[CFileMetaData] c_metadata
|
| 1058 |
+
|
| 1059 |
+
c_metadata = other.sp_metadata
|
| 1060 |
+
self._metadata.AppendRowGroups(deref(c_metadata))
|
| 1061 |
+
|
| 1062 |
+
def write_metadata_file(self, where):
|
| 1063 |
+
"""
|
| 1064 |
+
Write the metadata to a metadata-only Parquet file.
|
| 1065 |
+
|
| 1066 |
+
Parameters
|
| 1067 |
+
----------
|
| 1068 |
+
where : path or file-like object
|
| 1069 |
+
Where to write the metadata. Should be a writable path on
|
| 1070 |
+
the local filesystem, or a writable file-like object.
|
| 1071 |
+
"""
|
| 1072 |
+
cdef:
|
| 1073 |
+
shared_ptr[COutputStream] sink
|
| 1074 |
+
c_string c_where
|
| 1075 |
+
|
| 1076 |
+
try:
|
| 1077 |
+
where = _stringify_path(where)
|
| 1078 |
+
except TypeError:
|
| 1079 |
+
get_writer(where, &sink)
|
| 1080 |
+
else:
|
| 1081 |
+
c_where = tobytes(where)
|
| 1082 |
+
with nogil:
|
| 1083 |
+
sink = GetResultValue(FileOutputStream.Open(c_where))
|
| 1084 |
+
|
| 1085 |
+
with nogil:
|
| 1086 |
+
check_status(
|
| 1087 |
+
WriteMetaDataFile(deref(self._metadata), sink.get()))
|
| 1088 |
+
|
| 1089 |
+
|
| 1090 |
+
cdef class ParquetSchema(_Weakrefable):
|
| 1091 |
+
"""A Parquet schema."""
|
| 1092 |
+
|
| 1093 |
+
def __cinit__(self, FileMetaData container):
|
| 1094 |
+
self.parent = container
|
| 1095 |
+
self.schema = container._metadata.schema()
|
| 1096 |
+
|
| 1097 |
+
def __repr__(self):
|
| 1098 |
+
return "{0}\n{1}".format(
|
| 1099 |
+
object.__repr__(self),
|
| 1100 |
+
frombytes(self.schema.ToString(), safe=True))
|
| 1101 |
+
|
| 1102 |
+
def __reduce__(self):
|
| 1103 |
+
return ParquetSchema, (self.parent,)
|
| 1104 |
+
|
| 1105 |
+
def __len__(self):
|
| 1106 |
+
return self.schema.num_columns()
|
| 1107 |
+
|
| 1108 |
+
def __getitem__(self, i):
|
| 1109 |
+
return self.column(i)
|
| 1110 |
+
|
| 1111 |
+
def __hash__(self):
|
| 1112 |
+
return hash(self.schema.ToString())
|
| 1113 |
+
|
| 1114 |
+
@property
|
| 1115 |
+
def names(self):
|
| 1116 |
+
"""Name of each field (list of str)."""
|
| 1117 |
+
return [self[i].name for i in range(len(self))]
|
| 1118 |
+
|
| 1119 |
+
def to_arrow_schema(self):
|
| 1120 |
+
"""
|
| 1121 |
+
Convert Parquet schema to effective Arrow schema.
|
| 1122 |
+
|
| 1123 |
+
Returns
|
| 1124 |
+
-------
|
| 1125 |
+
schema : Schema
|
| 1126 |
+
"""
|
| 1127 |
+
cdef shared_ptr[CSchema] sp_arrow_schema
|
| 1128 |
+
|
| 1129 |
+
with nogil:
|
| 1130 |
+
check_status(FromParquetSchema(
|
| 1131 |
+
self.schema, default_arrow_reader_properties(),
|
| 1132 |
+
self.parent._metadata.key_value_metadata(),
|
| 1133 |
+
&sp_arrow_schema))
|
| 1134 |
+
|
| 1135 |
+
return pyarrow_wrap_schema(sp_arrow_schema)
|
| 1136 |
+
|
| 1137 |
+
def __eq__(self, other):
|
| 1138 |
+
try:
|
| 1139 |
+
return self.equals(other)
|
| 1140 |
+
except TypeError:
|
| 1141 |
+
return NotImplemented
|
| 1142 |
+
|
| 1143 |
+
def equals(self, ParquetSchema other):
|
| 1144 |
+
"""
|
| 1145 |
+
Return whether the two schemas are equal.
|
| 1146 |
+
|
| 1147 |
+
Parameters
|
| 1148 |
+
----------
|
| 1149 |
+
other : ParquetSchema
|
| 1150 |
+
Schema to compare against.
|
| 1151 |
+
|
| 1152 |
+
Returns
|
| 1153 |
+
-------
|
| 1154 |
+
are_equal : bool
|
| 1155 |
+
"""
|
| 1156 |
+
return self.schema.Equals(deref(other.schema))
|
| 1157 |
+
|
| 1158 |
+
def column(self, i):
|
| 1159 |
+
"""
|
| 1160 |
+
Return the schema for a single column.
|
| 1161 |
+
|
| 1162 |
+
Parameters
|
| 1163 |
+
----------
|
| 1164 |
+
i : int
|
| 1165 |
+
Index of column in schema.
|
| 1166 |
+
|
| 1167 |
+
Returns
|
| 1168 |
+
-------
|
| 1169 |
+
column_schema : ColumnSchema
|
| 1170 |
+
"""
|
| 1171 |
+
if i < 0 or i >= len(self):
|
| 1172 |
+
raise IndexError('{0} out of bounds'.format(i))
|
| 1173 |
+
|
| 1174 |
+
return ColumnSchema(self, i)
|
| 1175 |
+
|
| 1176 |
+
|
| 1177 |
+
cdef class ColumnSchema(_Weakrefable):
|
| 1178 |
+
"""Schema for a single column."""
|
| 1179 |
+
cdef:
|
| 1180 |
+
int index
|
| 1181 |
+
ParquetSchema parent
|
| 1182 |
+
const ColumnDescriptor* descr
|
| 1183 |
+
|
| 1184 |
+
def __cinit__(self, ParquetSchema schema, int index):
|
| 1185 |
+
self.parent = schema
|
| 1186 |
+
self.index = index # for pickling support
|
| 1187 |
+
self.descr = schema.schema.Column(index)
|
| 1188 |
+
|
| 1189 |
+
def __eq__(self, other):
|
| 1190 |
+
try:
|
| 1191 |
+
return self.equals(other)
|
| 1192 |
+
except TypeError:
|
| 1193 |
+
return NotImplemented
|
| 1194 |
+
|
| 1195 |
+
def __reduce__(self):
|
| 1196 |
+
return ColumnSchema, (self.parent, self.index)
|
| 1197 |
+
|
| 1198 |
+
def equals(self, ColumnSchema other):
|
| 1199 |
+
"""
|
| 1200 |
+
Return whether the two column schemas are equal.
|
| 1201 |
+
|
| 1202 |
+
Parameters
|
| 1203 |
+
----------
|
| 1204 |
+
other : ColumnSchema
|
| 1205 |
+
Schema to compare against.
|
| 1206 |
+
|
| 1207 |
+
Returns
|
| 1208 |
+
-------
|
| 1209 |
+
are_equal : bool
|
| 1210 |
+
"""
|
| 1211 |
+
return self.descr.Equals(deref(other.descr))
|
| 1212 |
+
|
| 1213 |
+
def __repr__(self):
|
| 1214 |
+
physical_type = self.physical_type
|
| 1215 |
+
converted_type = self.converted_type
|
| 1216 |
+
if converted_type == 'DECIMAL':
|
| 1217 |
+
converted_type = 'DECIMAL({0}, {1})'.format(self.precision,
|
| 1218 |
+
self.scale)
|
| 1219 |
+
elif physical_type == 'FIXED_LEN_BYTE_ARRAY':
|
| 1220 |
+
converted_type = ('FIXED_LEN_BYTE_ARRAY(length={0})'
|
| 1221 |
+
.format(self.length))
|
| 1222 |
+
|
| 1223 |
+
return """<ParquetColumnSchema>
|
| 1224 |
+
name: {0}
|
| 1225 |
+
path: {1}
|
| 1226 |
+
max_definition_level: {2}
|
| 1227 |
+
max_repetition_level: {3}
|
| 1228 |
+
physical_type: {4}
|
| 1229 |
+
logical_type: {5}
|
| 1230 |
+
converted_type (legacy): {6}""".format(self.name, self.path,
|
| 1231 |
+
self.max_definition_level,
|
| 1232 |
+
self.max_repetition_level,
|
| 1233 |
+
physical_type,
|
| 1234 |
+
str(self.logical_type),
|
| 1235 |
+
converted_type)
|
| 1236 |
+
|
| 1237 |
+
@property
|
| 1238 |
+
def name(self):
|
| 1239 |
+
"""Name of field (str)."""
|
| 1240 |
+
return frombytes(self.descr.name())
|
| 1241 |
+
|
| 1242 |
+
@property
|
| 1243 |
+
def path(self):
|
| 1244 |
+
"""Nested path to field, separated by periods (str)."""
|
| 1245 |
+
return frombytes(self.descr.path().get().ToDotString())
|
| 1246 |
+
|
| 1247 |
+
@property
|
| 1248 |
+
def max_definition_level(self):
|
| 1249 |
+
"""Maximum definition level (int)."""
|
| 1250 |
+
return self.descr.max_definition_level()
|
| 1251 |
+
|
| 1252 |
+
@property
|
| 1253 |
+
def max_repetition_level(self):
|
| 1254 |
+
"""Maximum repetition level (int)."""
|
| 1255 |
+
return self.descr.max_repetition_level()
|
| 1256 |
+
|
| 1257 |
+
@property
|
| 1258 |
+
def physical_type(self):
|
| 1259 |
+
"""Name of physical type (str)."""
|
| 1260 |
+
return physical_type_name_from_enum(self.descr.physical_type())
|
| 1261 |
+
|
| 1262 |
+
@property
|
| 1263 |
+
def logical_type(self):
|
| 1264 |
+
"""Logical type of column (:class:`ParquetLogicalType`)."""
|
| 1265 |
+
return wrap_logical_type(self.descr.logical_type())
|
| 1266 |
+
|
| 1267 |
+
@property
|
| 1268 |
+
def converted_type(self):
|
| 1269 |
+
"""Legacy converted type (str or None)."""
|
| 1270 |
+
return converted_type_name_from_enum(self.descr.converted_type())
|
| 1271 |
+
|
| 1272 |
+
# FIXED_LEN_BYTE_ARRAY attribute
|
| 1273 |
+
@property
|
| 1274 |
+
def length(self):
|
| 1275 |
+
"""Array length if fixed length byte array type, None otherwise (int or None)."""
|
| 1276 |
+
return self.descr.type_length()
|
| 1277 |
+
|
| 1278 |
+
# Decimal attributes
|
| 1279 |
+
@property
|
| 1280 |
+
def precision(self):
|
| 1281 |
+
"""Precision if decimal type, None otherwise (int or None)."""
|
| 1282 |
+
return self.descr.type_precision()
|
| 1283 |
+
|
| 1284 |
+
@property
|
| 1285 |
+
def scale(self):
|
| 1286 |
+
"""Scale if decimal type, None otherwise (int or None)."""
|
| 1287 |
+
return self.descr.type_scale()
|
| 1288 |
+
|
| 1289 |
+
|
| 1290 |
+
cdef physical_type_name_from_enum(ParquetType type_):
|
| 1291 |
+
return {
|
| 1292 |
+
ParquetType_BOOLEAN: 'BOOLEAN',
|
| 1293 |
+
ParquetType_INT32: 'INT32',
|
| 1294 |
+
ParquetType_INT64: 'INT64',
|
| 1295 |
+
ParquetType_INT96: 'INT96',
|
| 1296 |
+
ParquetType_FLOAT: 'FLOAT',
|
| 1297 |
+
ParquetType_DOUBLE: 'DOUBLE',
|
| 1298 |
+
ParquetType_BYTE_ARRAY: 'BYTE_ARRAY',
|
| 1299 |
+
ParquetType_FIXED_LEN_BYTE_ARRAY: 'FIXED_LEN_BYTE_ARRAY',
|
| 1300 |
+
}.get(type_, 'UNKNOWN')
|
| 1301 |
+
|
| 1302 |
+
|
| 1303 |
+
cdef logical_type_name_from_enum(ParquetLogicalTypeId type_):
|
| 1304 |
+
return {
|
| 1305 |
+
ParquetLogicalType_UNDEFINED: 'UNDEFINED',
|
| 1306 |
+
ParquetLogicalType_STRING: 'STRING',
|
| 1307 |
+
ParquetLogicalType_MAP: 'MAP',
|
| 1308 |
+
ParquetLogicalType_LIST: 'LIST',
|
| 1309 |
+
ParquetLogicalType_ENUM: 'ENUM',
|
| 1310 |
+
ParquetLogicalType_DECIMAL: 'DECIMAL',
|
| 1311 |
+
ParquetLogicalType_DATE: 'DATE',
|
| 1312 |
+
ParquetLogicalType_TIME: 'TIME',
|
| 1313 |
+
ParquetLogicalType_TIMESTAMP: 'TIMESTAMP',
|
| 1314 |
+
ParquetLogicalType_INT: 'INT',
|
| 1315 |
+
ParquetLogicalType_FLOAT16: 'FLOAT16',
|
| 1316 |
+
ParquetLogicalType_JSON: 'JSON',
|
| 1317 |
+
ParquetLogicalType_BSON: 'BSON',
|
| 1318 |
+
ParquetLogicalType_UUID: 'UUID',
|
| 1319 |
+
ParquetLogicalType_NONE: 'NONE',
|
| 1320 |
+
}.get(type_, 'UNKNOWN')
|
| 1321 |
+
|
| 1322 |
+
|
| 1323 |
+
cdef converted_type_name_from_enum(ParquetConvertedType type_):
|
| 1324 |
+
return {
|
| 1325 |
+
ParquetConvertedType_NONE: 'NONE',
|
| 1326 |
+
ParquetConvertedType_UTF8: 'UTF8',
|
| 1327 |
+
ParquetConvertedType_MAP: 'MAP',
|
| 1328 |
+
ParquetConvertedType_MAP_KEY_VALUE: 'MAP_KEY_VALUE',
|
| 1329 |
+
ParquetConvertedType_LIST: 'LIST',
|
| 1330 |
+
ParquetConvertedType_ENUM: 'ENUM',
|
| 1331 |
+
ParquetConvertedType_DECIMAL: 'DECIMAL',
|
| 1332 |
+
ParquetConvertedType_DATE: 'DATE',
|
| 1333 |
+
ParquetConvertedType_TIME_MILLIS: 'TIME_MILLIS',
|
| 1334 |
+
ParquetConvertedType_TIME_MICROS: 'TIME_MICROS',
|
| 1335 |
+
ParquetConvertedType_TIMESTAMP_MILLIS: 'TIMESTAMP_MILLIS',
|
| 1336 |
+
ParquetConvertedType_TIMESTAMP_MICROS: 'TIMESTAMP_MICROS',
|
| 1337 |
+
ParquetConvertedType_UINT_8: 'UINT_8',
|
| 1338 |
+
ParquetConvertedType_UINT_16: 'UINT_16',
|
| 1339 |
+
ParquetConvertedType_UINT_32: 'UINT_32',
|
| 1340 |
+
ParquetConvertedType_UINT_64: 'UINT_64',
|
| 1341 |
+
ParquetConvertedType_INT_8: 'INT_8',
|
| 1342 |
+
ParquetConvertedType_INT_16: 'INT_16',
|
| 1343 |
+
ParquetConvertedType_INT_32: 'INT_32',
|
| 1344 |
+
ParquetConvertedType_INT_64: 'INT_64',
|
| 1345 |
+
ParquetConvertedType_JSON: 'JSON',
|
| 1346 |
+
ParquetConvertedType_BSON: 'BSON',
|
| 1347 |
+
ParquetConvertedType_INTERVAL: 'INTERVAL',
|
| 1348 |
+
}.get(type_, 'UNKNOWN')
|
| 1349 |
+
|
| 1350 |
+
|
| 1351 |
+
cdef encoding_name_from_enum(ParquetEncoding encoding_):
|
| 1352 |
+
return {
|
| 1353 |
+
ParquetEncoding_PLAIN: 'PLAIN',
|
| 1354 |
+
ParquetEncoding_PLAIN_DICTIONARY: 'PLAIN_DICTIONARY',
|
| 1355 |
+
ParquetEncoding_RLE: 'RLE',
|
| 1356 |
+
ParquetEncoding_BIT_PACKED: 'BIT_PACKED',
|
| 1357 |
+
ParquetEncoding_DELTA_BINARY_PACKED: 'DELTA_BINARY_PACKED',
|
| 1358 |
+
ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY: 'DELTA_LENGTH_BYTE_ARRAY',
|
| 1359 |
+
ParquetEncoding_DELTA_BYTE_ARRAY: 'DELTA_BYTE_ARRAY',
|
| 1360 |
+
ParquetEncoding_RLE_DICTIONARY: 'RLE_DICTIONARY',
|
| 1361 |
+
ParquetEncoding_BYTE_STREAM_SPLIT: 'BYTE_STREAM_SPLIT',
|
| 1362 |
+
}.get(encoding_, 'UNKNOWN')
|
| 1363 |
+
|
| 1364 |
+
|
| 1365 |
+
cdef encoding_enum_from_name(str encoding_name):
|
| 1366 |
+
enc = {
|
| 1367 |
+
'PLAIN': ParquetEncoding_PLAIN,
|
| 1368 |
+
'BIT_PACKED': ParquetEncoding_BIT_PACKED,
|
| 1369 |
+
'RLE': ParquetEncoding_RLE,
|
| 1370 |
+
'BYTE_STREAM_SPLIT': ParquetEncoding_BYTE_STREAM_SPLIT,
|
| 1371 |
+
'DELTA_BINARY_PACKED': ParquetEncoding_DELTA_BINARY_PACKED,
|
| 1372 |
+
'DELTA_LENGTH_BYTE_ARRAY': ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY,
|
| 1373 |
+
'DELTA_BYTE_ARRAY': ParquetEncoding_DELTA_BYTE_ARRAY,
|
| 1374 |
+
'RLE_DICTIONARY': 'dict',
|
| 1375 |
+
'PLAIN_DICTIONARY': 'dict',
|
| 1376 |
+
}.get(encoding_name, None)
|
| 1377 |
+
if enc is None:
|
| 1378 |
+
raise ValueError(f"Unsupported column encoding: {encoding_name!r}")
|
| 1379 |
+
elif enc == 'dict':
|
| 1380 |
+
raise ValueError(f"{encoding_name!r} is already used by default.")
|
| 1381 |
+
else:
|
| 1382 |
+
return enc
|
| 1383 |
+
|
| 1384 |
+
|
| 1385 |
+
cdef compression_name_from_enum(ParquetCompression compression_):
|
| 1386 |
+
return {
|
| 1387 |
+
ParquetCompression_UNCOMPRESSED: 'UNCOMPRESSED',
|
| 1388 |
+
ParquetCompression_SNAPPY: 'SNAPPY',
|
| 1389 |
+
ParquetCompression_GZIP: 'GZIP',
|
| 1390 |
+
ParquetCompression_LZO: 'LZO',
|
| 1391 |
+
ParquetCompression_BROTLI: 'BROTLI',
|
| 1392 |
+
ParquetCompression_LZ4: 'LZ4',
|
| 1393 |
+
ParquetCompression_ZSTD: 'ZSTD',
|
| 1394 |
+
}.get(compression_, 'UNKNOWN')
|
| 1395 |
+
|
| 1396 |
+
|
| 1397 |
+
cdef int check_compression_name(name) except -1:
|
| 1398 |
+
if name.upper() not in {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
|
| 1399 |
+
'ZSTD'}:
|
| 1400 |
+
raise ArrowException("Unsupported compression: " + name)
|
| 1401 |
+
return 0
|
| 1402 |
+
|
| 1403 |
+
|
| 1404 |
+
cdef ParquetCompression compression_from_name(name):
|
| 1405 |
+
name = name.upper()
|
| 1406 |
+
if name == 'SNAPPY':
|
| 1407 |
+
return ParquetCompression_SNAPPY
|
| 1408 |
+
elif name == 'GZIP':
|
| 1409 |
+
return ParquetCompression_GZIP
|
| 1410 |
+
elif name == 'LZO':
|
| 1411 |
+
return ParquetCompression_LZO
|
| 1412 |
+
elif name == 'BROTLI':
|
| 1413 |
+
return ParquetCompression_BROTLI
|
| 1414 |
+
elif name == 'LZ4':
|
| 1415 |
+
return ParquetCompression_LZ4
|
| 1416 |
+
elif name == 'ZSTD':
|
| 1417 |
+
return ParquetCompression_ZSTD
|
| 1418 |
+
else:
|
| 1419 |
+
return ParquetCompression_UNCOMPRESSED
|
| 1420 |
+
|
| 1421 |
+
|
| 1422 |
+
cdef class ParquetReader(_Weakrefable):
|
| 1423 |
+
cdef:
|
| 1424 |
+
object source
|
| 1425 |
+
CMemoryPool* pool
|
| 1426 |
+
UniquePtrNoGIL[FileReader] reader
|
| 1427 |
+
FileMetaData _metadata
|
| 1428 |
+
shared_ptr[CRandomAccessFile] rd_handle
|
| 1429 |
+
|
| 1430 |
+
cdef public:
|
| 1431 |
+
_column_idx_map
|
| 1432 |
+
|
| 1433 |
+
def __cinit__(self, MemoryPool memory_pool=None):
|
| 1434 |
+
self.pool = maybe_unbox_memory_pool(memory_pool)
|
| 1435 |
+
self._metadata = None
|
| 1436 |
+
|
| 1437 |
+
def open(self, object source not None, *, bint use_memory_map=False,
|
| 1438 |
+
read_dictionary=None, FileMetaData metadata=None,
|
| 1439 |
+
int buffer_size=0, bint pre_buffer=False,
|
| 1440 |
+
coerce_int96_timestamp_unit=None,
|
| 1441 |
+
FileDecryptionProperties decryption_properties=None,
|
| 1442 |
+
thrift_string_size_limit=None,
|
| 1443 |
+
thrift_container_size_limit=None,
|
| 1444 |
+
page_checksum_verification=False):
|
| 1445 |
+
"""
|
| 1446 |
+
Open a parquet file for reading.
|
| 1447 |
+
|
| 1448 |
+
Parameters
|
| 1449 |
+
----------
|
| 1450 |
+
source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
|
| 1451 |
+
use_memory_map : bool, default False
|
| 1452 |
+
read_dictionary : iterable[int or str], optional
|
| 1453 |
+
metadata : FileMetaData, optional
|
| 1454 |
+
buffer_size : int, default 0
|
| 1455 |
+
pre_buffer : bool, default False
|
| 1456 |
+
coerce_int96_timestamp_unit : str, optional
|
| 1457 |
+
decryption_properties : FileDecryptionProperties, optional
|
| 1458 |
+
thrift_string_size_limit : int, optional
|
| 1459 |
+
thrift_container_size_limit : int, optional
|
| 1460 |
+
page_checksum_verification : bool, default False
|
| 1461 |
+
"""
|
| 1462 |
+
cdef:
|
| 1463 |
+
shared_ptr[CFileMetaData] c_metadata
|
| 1464 |
+
CReaderProperties properties = default_reader_properties()
|
| 1465 |
+
ArrowReaderProperties arrow_props = (
|
| 1466 |
+
default_arrow_reader_properties())
|
| 1467 |
+
FileReaderBuilder builder
|
| 1468 |
+
|
| 1469 |
+
if pre_buffer and not is_threading_enabled():
|
| 1470 |
+
pre_buffer = False
|
| 1471 |
+
|
| 1472 |
+
if metadata is not None:
|
| 1473 |
+
c_metadata = metadata.sp_metadata
|
| 1474 |
+
|
| 1475 |
+
if buffer_size > 0:
|
| 1476 |
+
properties.enable_buffered_stream()
|
| 1477 |
+
properties.set_buffer_size(buffer_size)
|
| 1478 |
+
elif buffer_size == 0:
|
| 1479 |
+
properties.disable_buffered_stream()
|
| 1480 |
+
else:
|
| 1481 |
+
raise ValueError('Buffer size must be larger than zero')
|
| 1482 |
+
|
| 1483 |
+
if thrift_string_size_limit is not None:
|
| 1484 |
+
if thrift_string_size_limit <= 0:
|
| 1485 |
+
raise ValueError("thrift_string_size_limit "
|
| 1486 |
+
"must be larger than zero")
|
| 1487 |
+
properties.set_thrift_string_size_limit(thrift_string_size_limit)
|
| 1488 |
+
if thrift_container_size_limit is not None:
|
| 1489 |
+
if thrift_container_size_limit <= 0:
|
| 1490 |
+
raise ValueError("thrift_container_size_limit "
|
| 1491 |
+
"must be larger than zero")
|
| 1492 |
+
properties.set_thrift_container_size_limit(
|
| 1493 |
+
thrift_container_size_limit)
|
| 1494 |
+
|
| 1495 |
+
if decryption_properties is not None:
|
| 1496 |
+
properties.file_decryption_properties(
|
| 1497 |
+
decryption_properties.unwrap())
|
| 1498 |
+
|
| 1499 |
+
arrow_props.set_pre_buffer(pre_buffer)
|
| 1500 |
+
|
| 1501 |
+
properties.set_page_checksum_verification(page_checksum_verification)
|
| 1502 |
+
|
| 1503 |
+
if coerce_int96_timestamp_unit is None:
|
| 1504 |
+
# use the default defined in default_arrow_reader_properties()
|
| 1505 |
+
pass
|
| 1506 |
+
else:
|
| 1507 |
+
arrow_props.set_coerce_int96_timestamp_unit(
|
| 1508 |
+
string_to_timeunit(coerce_int96_timestamp_unit))
|
| 1509 |
+
|
| 1510 |
+
self.source = source
|
| 1511 |
+
get_reader(source, use_memory_map, &self.rd_handle)
|
| 1512 |
+
|
| 1513 |
+
with nogil:
|
| 1514 |
+
check_status(builder.Open(self.rd_handle, properties, c_metadata))
|
| 1515 |
+
|
| 1516 |
+
# Set up metadata
|
| 1517 |
+
with nogil:
|
| 1518 |
+
c_metadata = builder.raw_reader().metadata()
|
| 1519 |
+
self._metadata = result = FileMetaData()
|
| 1520 |
+
result.init(c_metadata)
|
| 1521 |
+
|
| 1522 |
+
if read_dictionary is not None:
|
| 1523 |
+
self._set_read_dictionary(read_dictionary, &arrow_props)
|
| 1524 |
+
|
| 1525 |
+
with nogil:
|
| 1526 |
+
check_status(builder.memory_pool(self.pool)
|
| 1527 |
+
.properties(arrow_props)
|
| 1528 |
+
.Build(&self.reader))
|
| 1529 |
+
|
| 1530 |
+
cdef _set_read_dictionary(self, read_dictionary,
|
| 1531 |
+
ArrowReaderProperties* props):
|
| 1532 |
+
for column in read_dictionary:
|
| 1533 |
+
if not isinstance(column, int):
|
| 1534 |
+
column = self.column_name_idx(column)
|
| 1535 |
+
props.set_read_dictionary(column, True)
|
| 1536 |
+
|
| 1537 |
+
@property
|
| 1538 |
+
def column_paths(self):
|
| 1539 |
+
cdef:
|
| 1540 |
+
FileMetaData container = self.metadata
|
| 1541 |
+
const CFileMetaData* metadata = container._metadata
|
| 1542 |
+
vector[c_string] path
|
| 1543 |
+
int i = 0
|
| 1544 |
+
|
| 1545 |
+
paths = []
|
| 1546 |
+
for i in range(0, metadata.num_columns()):
|
| 1547 |
+
path = (metadata.schema().Column(i)
|
| 1548 |
+
.path().get().ToDotVector())
|
| 1549 |
+
paths.append([frombytes(x) for x in path])
|
| 1550 |
+
|
| 1551 |
+
return paths
|
| 1552 |
+
|
| 1553 |
+
@property
|
| 1554 |
+
def metadata(self):
|
| 1555 |
+
return self._metadata
|
| 1556 |
+
|
| 1557 |
+
@property
|
| 1558 |
+
def schema_arrow(self):
|
| 1559 |
+
cdef shared_ptr[CSchema] out
|
| 1560 |
+
with nogil:
|
| 1561 |
+
check_status(self.reader.get().GetSchema(&out))
|
| 1562 |
+
return pyarrow_wrap_schema(out)
|
| 1563 |
+
|
| 1564 |
+
@property
|
| 1565 |
+
def num_row_groups(self):
|
| 1566 |
+
return self.reader.get().num_row_groups()
|
| 1567 |
+
|
| 1568 |
+
def set_use_threads(self, bint use_threads):
|
| 1569 |
+
"""
|
| 1570 |
+
Parameters
|
| 1571 |
+
----------
|
| 1572 |
+
use_threads : bool
|
| 1573 |
+
"""
|
| 1574 |
+
if is_threading_enabled():
|
| 1575 |
+
self.reader.get().set_use_threads(use_threads)
|
| 1576 |
+
else:
|
| 1577 |
+
self.reader.get().set_use_threads(False)
|
| 1578 |
+
|
| 1579 |
+
def set_batch_size(self, int64_t batch_size):
|
| 1580 |
+
"""
|
| 1581 |
+
Parameters
|
| 1582 |
+
----------
|
| 1583 |
+
batch_size : int64
|
| 1584 |
+
"""
|
| 1585 |
+
self.reader.get().set_batch_size(batch_size)
|
| 1586 |
+
|
| 1587 |
+
def iter_batches(self, int64_t batch_size, row_groups, column_indices=None,
|
| 1588 |
+
bint use_threads=True):
|
| 1589 |
+
"""
|
| 1590 |
+
Parameters
|
| 1591 |
+
----------
|
| 1592 |
+
batch_size : int64
|
| 1593 |
+
row_groups : list[int]
|
| 1594 |
+
column_indices : list[int], optional
|
| 1595 |
+
use_threads : bool, default True
|
| 1596 |
+
|
| 1597 |
+
Yields
|
| 1598 |
+
------
|
| 1599 |
+
next : RecordBatch
|
| 1600 |
+
"""
|
| 1601 |
+
cdef:
|
| 1602 |
+
vector[int] c_row_groups
|
| 1603 |
+
vector[int] c_column_indices
|
| 1604 |
+
shared_ptr[CRecordBatch] record_batch
|
| 1605 |
+
UniquePtrNoGIL[CRecordBatchReader] recordbatchreader
|
| 1606 |
+
|
| 1607 |
+
self.set_batch_size(batch_size)
|
| 1608 |
+
|
| 1609 |
+
if use_threads:
|
| 1610 |
+
self.set_use_threads(use_threads)
|
| 1611 |
+
|
| 1612 |
+
for row_group in row_groups:
|
| 1613 |
+
c_row_groups.push_back(row_group)
|
| 1614 |
+
|
| 1615 |
+
if column_indices is not None:
|
| 1616 |
+
for index in column_indices:
|
| 1617 |
+
c_column_indices.push_back(index)
|
| 1618 |
+
with nogil:
|
| 1619 |
+
check_status(
|
| 1620 |
+
self.reader.get().GetRecordBatchReader(
|
| 1621 |
+
c_row_groups, c_column_indices, &recordbatchreader
|
| 1622 |
+
)
|
| 1623 |
+
)
|
| 1624 |
+
else:
|
| 1625 |
+
with nogil:
|
| 1626 |
+
check_status(
|
| 1627 |
+
self.reader.get().GetRecordBatchReader(
|
| 1628 |
+
c_row_groups, &recordbatchreader
|
| 1629 |
+
)
|
| 1630 |
+
)
|
| 1631 |
+
|
| 1632 |
+
while True:
|
| 1633 |
+
with nogil:
|
| 1634 |
+
check_status(
|
| 1635 |
+
recordbatchreader.get().ReadNext(&record_batch)
|
| 1636 |
+
)
|
| 1637 |
+
if record_batch.get() == NULL:
|
| 1638 |
+
break
|
| 1639 |
+
|
| 1640 |
+
yield pyarrow_wrap_batch(record_batch)
|
| 1641 |
+
|
| 1642 |
+
def read_row_group(self, int i, column_indices=None,
|
| 1643 |
+
bint use_threads=True):
|
| 1644 |
+
"""
|
| 1645 |
+
Parameters
|
| 1646 |
+
----------
|
| 1647 |
+
i : int
|
| 1648 |
+
column_indices : list[int], optional
|
| 1649 |
+
use_threads : bool, default True
|
| 1650 |
+
|
| 1651 |
+
Returns
|
| 1652 |
+
-------
|
| 1653 |
+
table : pyarrow.Table
|
| 1654 |
+
"""
|
| 1655 |
+
return self.read_row_groups([i], column_indices, use_threads)
|
| 1656 |
+
|
| 1657 |
+
def read_row_groups(self, row_groups not None, column_indices=None,
|
| 1658 |
+
bint use_threads=True):
|
| 1659 |
+
"""
|
| 1660 |
+
Parameters
|
| 1661 |
+
----------
|
| 1662 |
+
row_groups : list[int]
|
| 1663 |
+
column_indices : list[int], optional
|
| 1664 |
+
use_threads : bool, default True
|
| 1665 |
+
|
| 1666 |
+
Returns
|
| 1667 |
+
-------
|
| 1668 |
+
table : pyarrow.Table
|
| 1669 |
+
"""
|
| 1670 |
+
cdef:
|
| 1671 |
+
shared_ptr[CTable] ctable
|
| 1672 |
+
vector[int] c_row_groups
|
| 1673 |
+
vector[int] c_column_indices
|
| 1674 |
+
|
| 1675 |
+
self.set_use_threads(use_threads)
|
| 1676 |
+
|
| 1677 |
+
for row_group in row_groups:
|
| 1678 |
+
c_row_groups.push_back(row_group)
|
| 1679 |
+
|
| 1680 |
+
if column_indices is not None:
|
| 1681 |
+
for index in column_indices:
|
| 1682 |
+
c_column_indices.push_back(index)
|
| 1683 |
+
|
| 1684 |
+
with nogil:
|
| 1685 |
+
check_status(self.reader.get()
|
| 1686 |
+
.ReadRowGroups(c_row_groups, c_column_indices,
|
| 1687 |
+
&ctable))
|
| 1688 |
+
else:
|
| 1689 |
+
# Read all columns
|
| 1690 |
+
with nogil:
|
| 1691 |
+
check_status(self.reader.get()
|
| 1692 |
+
.ReadRowGroups(c_row_groups, &ctable))
|
| 1693 |
+
return pyarrow_wrap_table(ctable)
|
| 1694 |
+
|
| 1695 |
+
def read_all(self, column_indices=None, bint use_threads=True):
|
| 1696 |
+
"""
|
| 1697 |
+
Parameters
|
| 1698 |
+
----------
|
| 1699 |
+
column_indices : list[int], optional
|
| 1700 |
+
use_threads : bool, default True
|
| 1701 |
+
|
| 1702 |
+
Returns
|
| 1703 |
+
-------
|
| 1704 |
+
table : pyarrow.Table
|
| 1705 |
+
"""
|
| 1706 |
+
cdef:
|
| 1707 |
+
shared_ptr[CTable] ctable
|
| 1708 |
+
vector[int] c_column_indices
|
| 1709 |
+
|
| 1710 |
+
self.set_use_threads(use_threads)
|
| 1711 |
+
|
| 1712 |
+
if column_indices is not None:
|
| 1713 |
+
for index in column_indices:
|
| 1714 |
+
c_column_indices.push_back(index)
|
| 1715 |
+
|
| 1716 |
+
with nogil:
|
| 1717 |
+
check_status(self.reader.get()
|
| 1718 |
+
.ReadTable(c_column_indices, &ctable))
|
| 1719 |
+
else:
|
| 1720 |
+
# Read all columns
|
| 1721 |
+
with nogil:
|
| 1722 |
+
check_status(self.reader.get()
|
| 1723 |
+
.ReadTable(&ctable))
|
| 1724 |
+
return pyarrow_wrap_table(ctable)
|
| 1725 |
+
|
| 1726 |
+
def scan_contents(self, column_indices=None, batch_size=65536):
|
| 1727 |
+
"""
|
| 1728 |
+
Parameters
|
| 1729 |
+
----------
|
| 1730 |
+
column_indices : list[int], optional
|
| 1731 |
+
batch_size : int32, default 65536
|
| 1732 |
+
|
| 1733 |
+
Returns
|
| 1734 |
+
-------
|
| 1735 |
+
num_rows : int64
|
| 1736 |
+
"""
|
| 1737 |
+
cdef:
|
| 1738 |
+
vector[int] c_column_indices
|
| 1739 |
+
int32_t c_batch_size
|
| 1740 |
+
int64_t c_num_rows
|
| 1741 |
+
|
| 1742 |
+
if column_indices is not None:
|
| 1743 |
+
for index in column_indices:
|
| 1744 |
+
c_column_indices.push_back(index)
|
| 1745 |
+
|
| 1746 |
+
c_batch_size = batch_size
|
| 1747 |
+
|
| 1748 |
+
with nogil:
|
| 1749 |
+
check_status(self.reader.get()
|
| 1750 |
+
.ScanContents(c_column_indices, c_batch_size,
|
| 1751 |
+
&c_num_rows))
|
| 1752 |
+
|
| 1753 |
+
return c_num_rows
|
| 1754 |
+
|
| 1755 |
+
def column_name_idx(self, column_name):
|
| 1756 |
+
"""
|
| 1757 |
+
Find the index of a column by its name.
|
| 1758 |
+
|
| 1759 |
+
Parameters
|
| 1760 |
+
----------
|
| 1761 |
+
column_name : str
|
| 1762 |
+
Name of the column; separation of nesting levels is done via ".".
|
| 1763 |
+
|
| 1764 |
+
Returns
|
| 1765 |
+
-------
|
| 1766 |
+
column_idx : int
|
| 1767 |
+
Integer index of the column in the schema.
|
| 1768 |
+
"""
|
| 1769 |
+
cdef:
|
| 1770 |
+
FileMetaData container = self.metadata
|
| 1771 |
+
const CFileMetaData* metadata = container._metadata
|
| 1772 |
+
int i = 0
|
| 1773 |
+
|
| 1774 |
+
if self._column_idx_map is None:
|
| 1775 |
+
self._column_idx_map = {}
|
| 1776 |
+
for i in range(0, metadata.num_columns()):
|
| 1777 |
+
col_bytes = tobytes(metadata.schema().Column(i)
|
| 1778 |
+
.path().get().ToDotString())
|
| 1779 |
+
self._column_idx_map[col_bytes] = i
|
| 1780 |
+
|
| 1781 |
+
return self._column_idx_map[tobytes(column_name)]
|
| 1782 |
+
|
| 1783 |
+
def read_column(self, int column_index):
|
| 1784 |
+
"""
|
| 1785 |
+
Read the column at the specified index.
|
| 1786 |
+
|
| 1787 |
+
Parameters
|
| 1788 |
+
----------
|
| 1789 |
+
column_index : int
|
| 1790 |
+
Index of the column.
|
| 1791 |
+
|
| 1792 |
+
Returns
|
| 1793 |
+
-------
|
| 1794 |
+
column : pyarrow.ChunkedArray
|
| 1795 |
+
"""
|
| 1796 |
+
cdef shared_ptr[CChunkedArray] out
|
| 1797 |
+
with nogil:
|
| 1798 |
+
check_status(self.reader.get()
|
| 1799 |
+
.ReadColumn(column_index, &out))
|
| 1800 |
+
return pyarrow_wrap_chunked_array(out)
|
| 1801 |
+
|
| 1802 |
+
def close(self):
|
| 1803 |
+
if not self.closed:
|
| 1804 |
+
with nogil:
|
| 1805 |
+
check_status(self.rd_handle.get().Close())
|
| 1806 |
+
|
| 1807 |
+
@property
|
| 1808 |
+
def closed(self):
|
| 1809 |
+
if self.rd_handle == NULL:
|
| 1810 |
+
return True
|
| 1811 |
+
with nogil:
|
| 1812 |
+
closed = self.rd_handle.get().closed()
|
| 1813 |
+
return closed
|
| 1814 |
+
|
| 1815 |
+
|
| 1816 |
+
cdef CSortingColumn _convert_sorting_column(SortingColumn sorting_column):
|
| 1817 |
+
cdef CSortingColumn c_sorting_column
|
| 1818 |
+
|
| 1819 |
+
c_sorting_column.column_idx = sorting_column.column_index
|
| 1820 |
+
c_sorting_column.descending = sorting_column.descending
|
| 1821 |
+
c_sorting_column.nulls_first = sorting_column.nulls_first
|
| 1822 |
+
|
| 1823 |
+
return c_sorting_column
|
| 1824 |
+
|
| 1825 |
+
|
| 1826 |
+
cdef vector[CSortingColumn] _convert_sorting_columns(sorting_columns) except *:
|
| 1827 |
+
if not (isinstance(sorting_columns, Sequence)
|
| 1828 |
+
and all(isinstance(col, SortingColumn) for col in sorting_columns)):
|
| 1829 |
+
raise ValueError(
|
| 1830 |
+
"'sorting_columns' must be a list of `SortingColumn`")
|
| 1831 |
+
|
| 1832 |
+
cdef vector[CSortingColumn] c_sorting_columns = [_convert_sorting_column(col)
|
| 1833 |
+
for col in sorting_columns]
|
| 1834 |
+
|
| 1835 |
+
return c_sorting_columns
|
| 1836 |
+
|
| 1837 |
+
|
| 1838 |
+
cdef shared_ptr[WriterProperties] _create_writer_properties(
|
| 1839 |
+
use_dictionary=None,
|
| 1840 |
+
compression=None,
|
| 1841 |
+
version=None,
|
| 1842 |
+
write_statistics=None,
|
| 1843 |
+
data_page_size=None,
|
| 1844 |
+
compression_level=None,
|
| 1845 |
+
use_byte_stream_split=False,
|
| 1846 |
+
column_encoding=None,
|
| 1847 |
+
data_page_version=None,
|
| 1848 |
+
FileEncryptionProperties encryption_properties=None,
|
| 1849 |
+
write_batch_size=None,
|
| 1850 |
+
dictionary_pagesize_limit=None,
|
| 1851 |
+
write_page_index=False,
|
| 1852 |
+
write_page_checksum=False,
|
| 1853 |
+
sorting_columns=None,
|
| 1854 |
+
store_decimal_as_integer=False) except *:
|
| 1855 |
+
|
| 1856 |
+
"""General writer properties"""
|
| 1857 |
+
cdef:
|
| 1858 |
+
shared_ptr[WriterProperties] properties
|
| 1859 |
+
WriterProperties.Builder props
|
| 1860 |
+
|
| 1861 |
+
# data_page_version
|
| 1862 |
+
|
| 1863 |
+
if data_page_version is not None:
|
| 1864 |
+
if data_page_version == "1.0":
|
| 1865 |
+
props.data_page_version(ParquetDataPageVersion_V1)
|
| 1866 |
+
elif data_page_version == "2.0":
|
| 1867 |
+
props.data_page_version(ParquetDataPageVersion_V2)
|
| 1868 |
+
else:
|
| 1869 |
+
raise ValueError("Unsupported Parquet data page version: {0}"
|
| 1870 |
+
.format(data_page_version))
|
| 1871 |
+
|
| 1872 |
+
# version
|
| 1873 |
+
|
| 1874 |
+
if version is not None:
|
| 1875 |
+
if version == "1.0":
|
| 1876 |
+
props.version(ParquetVersion_V1)
|
| 1877 |
+
elif version in ("2.0", "pseudo-2.0"):
|
| 1878 |
+
warnings.warn(
|
| 1879 |
+
"Parquet format '2.0' pseudo version is deprecated, use "
|
| 1880 |
+
"'2.4' or '2.6' for fine-grained feature selection",
|
| 1881 |
+
FutureWarning, stacklevel=2)
|
| 1882 |
+
props.version(ParquetVersion_V2_0)
|
| 1883 |
+
elif version == "2.4":
|
| 1884 |
+
props.version(ParquetVersion_V2_4)
|
| 1885 |
+
elif version == "2.6":
|
| 1886 |
+
props.version(ParquetVersion_V2_6)
|
| 1887 |
+
else:
|
| 1888 |
+
raise ValueError("Unsupported Parquet format version: {0}"
|
| 1889 |
+
.format(version))
|
| 1890 |
+
|
| 1891 |
+
# compression
|
| 1892 |
+
|
| 1893 |
+
if isinstance(compression, basestring):
|
| 1894 |
+
check_compression_name(compression)
|
| 1895 |
+
props.compression(compression_from_name(compression))
|
| 1896 |
+
elif compression is not None:
|
| 1897 |
+
for column, codec in compression.iteritems():
|
| 1898 |
+
check_compression_name(codec)
|
| 1899 |
+
props.compression(tobytes(column), compression_from_name(codec))
|
| 1900 |
+
|
| 1901 |
+
if isinstance(compression_level, int):
|
| 1902 |
+
props.compression_level(compression_level)
|
| 1903 |
+
elif compression_level is not None:
|
| 1904 |
+
for column, level in compression_level.iteritems():
|
| 1905 |
+
props.compression_level(tobytes(column), level)
|
| 1906 |
+
|
| 1907 |
+
# use_dictionary
|
| 1908 |
+
|
| 1909 |
+
if isinstance(use_dictionary, bool):
|
| 1910 |
+
if use_dictionary:
|
| 1911 |
+
props.enable_dictionary()
|
| 1912 |
+
if column_encoding is not None:
|
| 1913 |
+
raise ValueError(
|
| 1914 |
+
"To use 'column_encoding' set 'use_dictionary' to False")
|
| 1915 |
+
else:
|
| 1916 |
+
props.disable_dictionary()
|
| 1917 |
+
elif use_dictionary is not None:
|
| 1918 |
+
# Deactivate dictionary encoding by default
|
| 1919 |
+
props.disable_dictionary()
|
| 1920 |
+
for column in use_dictionary:
|
| 1921 |
+
props.enable_dictionary(tobytes(column))
|
| 1922 |
+
if (column_encoding is not None and
|
| 1923 |
+
column_encoding.get(column) is not None):
|
| 1924 |
+
raise ValueError(
|
| 1925 |
+
"To use 'column_encoding' set 'use_dictionary' to False")
|
| 1926 |
+
|
| 1927 |
+
# write_statistics
|
| 1928 |
+
|
| 1929 |
+
if isinstance(write_statistics, bool):
|
| 1930 |
+
if write_statistics:
|
| 1931 |
+
props.enable_statistics()
|
| 1932 |
+
else:
|
| 1933 |
+
props.disable_statistics()
|
| 1934 |
+
elif write_statistics is not None:
|
| 1935 |
+
# Deactivate statistics by default and enable for specified columns
|
| 1936 |
+
props.disable_statistics()
|
| 1937 |
+
for column in write_statistics:
|
| 1938 |
+
props.enable_statistics(tobytes(column))
|
| 1939 |
+
|
| 1940 |
+
# sorting_columns
|
| 1941 |
+
|
| 1942 |
+
if sorting_columns is not None:
|
| 1943 |
+
props.set_sorting_columns(_convert_sorting_columns(sorting_columns))
|
| 1944 |
+
|
| 1945 |
+
# use_byte_stream_split
|
| 1946 |
+
|
| 1947 |
+
if isinstance(use_byte_stream_split, bool):
|
| 1948 |
+
if use_byte_stream_split:
|
| 1949 |
+
if column_encoding is not None:
|
| 1950 |
+
raise ValueError(
|
| 1951 |
+
"'use_byte_stream_split' cannot be passed"
|
| 1952 |
+
"together with 'column_encoding'")
|
| 1953 |
+
else:
|
| 1954 |
+
props.encoding(ParquetEncoding_BYTE_STREAM_SPLIT)
|
| 1955 |
+
elif use_byte_stream_split is not None:
|
| 1956 |
+
for column in use_byte_stream_split:
|
| 1957 |
+
if column_encoding is None:
|
| 1958 |
+
column_encoding = {column: 'BYTE_STREAM_SPLIT'}
|
| 1959 |
+
elif column_encoding.get(column, None) is None:
|
| 1960 |
+
column_encoding[column] = 'BYTE_STREAM_SPLIT'
|
| 1961 |
+
else:
|
| 1962 |
+
raise ValueError(
|
| 1963 |
+
"'use_byte_stream_split' cannot be passed"
|
| 1964 |
+
"together with 'column_encoding'")
|
| 1965 |
+
|
| 1966 |
+
# store_decimal_as_integer
|
| 1967 |
+
|
| 1968 |
+
if isinstance(store_decimal_as_integer, bool):
|
| 1969 |
+
if store_decimal_as_integer:
|
| 1970 |
+
props.enable_store_decimal_as_integer()
|
| 1971 |
+
else:
|
| 1972 |
+
props.disable_store_decimal_as_integer()
|
| 1973 |
+
else:
|
| 1974 |
+
raise TypeError("'store_decimal_as_integer' must be a boolean")
|
| 1975 |
+
|
| 1976 |
+
# column_encoding
|
| 1977 |
+
# encoding map - encode individual columns
|
| 1978 |
+
|
| 1979 |
+
if column_encoding is not None:
|
| 1980 |
+
if isinstance(column_encoding, dict):
|
| 1981 |
+
for column, _encoding in column_encoding.items():
|
| 1982 |
+
props.encoding(tobytes(column),
|
| 1983 |
+
encoding_enum_from_name(_encoding))
|
| 1984 |
+
elif isinstance(column_encoding, str):
|
| 1985 |
+
props.encoding(encoding_enum_from_name(column_encoding))
|
| 1986 |
+
else:
|
| 1987 |
+
raise TypeError(
|
| 1988 |
+
"'column_encoding' should be a dictionary or a string")
|
| 1989 |
+
|
| 1990 |
+
if data_page_size is not None:
|
| 1991 |
+
props.data_pagesize(data_page_size)
|
| 1992 |
+
|
| 1993 |
+
if write_batch_size is not None:
|
| 1994 |
+
props.write_batch_size(write_batch_size)
|
| 1995 |
+
|
| 1996 |
+
if dictionary_pagesize_limit is not None:
|
| 1997 |
+
props.dictionary_pagesize_limit(dictionary_pagesize_limit)
|
| 1998 |
+
|
| 1999 |
+
# encryption
|
| 2000 |
+
|
| 2001 |
+
if encryption_properties is not None:
|
| 2002 |
+
props.encryption(
|
| 2003 |
+
(<FileEncryptionProperties>encryption_properties).unwrap())
|
| 2004 |
+
|
| 2005 |
+
# For backwards compatibility reasons we cap the maximum row group size
|
| 2006 |
+
# at 64Mi rows. This could be changed in the future, though it would be
|
| 2007 |
+
# a breaking change.
|
| 2008 |
+
#
|
| 2009 |
+
# The user can always specify a smaller row group size (and the default
|
| 2010 |
+
# is smaller) when calling write_table. If the call to write_table uses
|
| 2011 |
+
# a size larger than this then it will be latched to this value.
|
| 2012 |
+
props.max_row_group_length(_MAX_ROW_GROUP_SIZE)
|
| 2013 |
+
|
| 2014 |
+
# checksum
|
| 2015 |
+
|
| 2016 |
+
if write_page_checksum:
|
| 2017 |
+
props.enable_page_checksum()
|
| 2018 |
+
else:
|
| 2019 |
+
props.disable_page_checksum()
|
| 2020 |
+
|
| 2021 |
+
# page index
|
| 2022 |
+
|
| 2023 |
+
if write_page_index:
|
| 2024 |
+
props.enable_write_page_index()
|
| 2025 |
+
else:
|
| 2026 |
+
props.disable_write_page_index()
|
| 2027 |
+
|
| 2028 |
+
properties = props.build()
|
| 2029 |
+
|
| 2030 |
+
return properties
|
| 2031 |
+
|
| 2032 |
+
|
| 2033 |
+
cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
|
| 2034 |
+
use_deprecated_int96_timestamps=False,
|
| 2035 |
+
coerce_timestamps=None,
|
| 2036 |
+
allow_truncated_timestamps=False,
|
| 2037 |
+
writer_engine_version=None,
|
| 2038 |
+
use_compliant_nested_type=True,
|
| 2039 |
+
store_schema=True) except *:
|
| 2040 |
+
"""Arrow writer properties"""
|
| 2041 |
+
cdef:
|
| 2042 |
+
shared_ptr[ArrowWriterProperties] arrow_properties
|
| 2043 |
+
ArrowWriterProperties.Builder arrow_props
|
| 2044 |
+
|
| 2045 |
+
# Store the original Arrow schema so things like dictionary types can
|
| 2046 |
+
# be automatically reconstructed
|
| 2047 |
+
if store_schema:
|
| 2048 |
+
arrow_props.store_schema()
|
| 2049 |
+
|
| 2050 |
+
# int96 support
|
| 2051 |
+
|
| 2052 |
+
if use_deprecated_int96_timestamps:
|
| 2053 |
+
arrow_props.enable_deprecated_int96_timestamps()
|
| 2054 |
+
else:
|
| 2055 |
+
arrow_props.disable_deprecated_int96_timestamps()
|
| 2056 |
+
|
| 2057 |
+
# coerce_timestamps
|
| 2058 |
+
|
| 2059 |
+
if coerce_timestamps == 'ms':
|
| 2060 |
+
arrow_props.coerce_timestamps(TimeUnit_MILLI)
|
| 2061 |
+
elif coerce_timestamps == 'us':
|
| 2062 |
+
arrow_props.coerce_timestamps(TimeUnit_MICRO)
|
| 2063 |
+
elif coerce_timestamps is not None:
|
| 2064 |
+
raise ValueError('Invalid value for coerce_timestamps: {0}'
|
| 2065 |
+
.format(coerce_timestamps))
|
| 2066 |
+
|
| 2067 |
+
# allow_truncated_timestamps
|
| 2068 |
+
|
| 2069 |
+
if allow_truncated_timestamps:
|
| 2070 |
+
arrow_props.allow_truncated_timestamps()
|
| 2071 |
+
else:
|
| 2072 |
+
arrow_props.disallow_truncated_timestamps()
|
| 2073 |
+
|
| 2074 |
+
# use_compliant_nested_type
|
| 2075 |
+
|
| 2076 |
+
if use_compliant_nested_type:
|
| 2077 |
+
arrow_props.enable_compliant_nested_types()
|
| 2078 |
+
else:
|
| 2079 |
+
arrow_props.disable_compliant_nested_types()
|
| 2080 |
+
|
| 2081 |
+
# writer_engine_version
|
| 2082 |
+
|
| 2083 |
+
if writer_engine_version == "V1":
|
| 2084 |
+
warnings.warn("V1 parquet writer engine is a no-op. Use V2.")
|
| 2085 |
+
arrow_props.set_engine_version(ArrowWriterEngineVersion.V1)
|
| 2086 |
+
elif writer_engine_version != "V2":
|
| 2087 |
+
raise ValueError("Unsupported Writer Engine Version: {0}"
|
| 2088 |
+
.format(writer_engine_version))
|
| 2089 |
+
|
| 2090 |
+
arrow_properties = arrow_props.build()
|
| 2091 |
+
|
| 2092 |
+
return arrow_properties
|
| 2093 |
+
|
| 2094 |
+
cdef _name_to_index_map(Schema arrow_schema):
|
| 2095 |
+
cdef:
|
| 2096 |
+
shared_ptr[CSchema] sp_arrow_schema
|
| 2097 |
+
shared_ptr[SchemaDescriptor] sp_parquet_schema
|
| 2098 |
+
shared_ptr[WriterProperties] props = _create_writer_properties()
|
| 2099 |
+
shared_ptr[ArrowWriterProperties] arrow_props = _create_arrow_writer_properties(
|
| 2100 |
+
use_deprecated_int96_timestamps=False,
|
| 2101 |
+
coerce_timestamps=None,
|
| 2102 |
+
allow_truncated_timestamps=False,
|
| 2103 |
+
writer_engine_version="V2"
|
| 2104 |
+
)
|
| 2105 |
+
|
| 2106 |
+
sp_arrow_schema = pyarrow_unwrap_schema(arrow_schema)
|
| 2107 |
+
|
| 2108 |
+
with nogil:
|
| 2109 |
+
check_status(ToParquetSchema(
|
| 2110 |
+
sp_arrow_schema.get(), deref(props.get()), deref(arrow_props.get()), &sp_parquet_schema))
|
| 2111 |
+
|
| 2112 |
+
out = dict()
|
| 2113 |
+
|
| 2114 |
+
cdef SchemaDescriptor* parquet_schema = sp_parquet_schema.get()
|
| 2115 |
+
|
| 2116 |
+
for i in range(parquet_schema.num_columns()):
|
| 2117 |
+
name = frombytes(parquet_schema.Column(i).path().get().ToDotString())
|
| 2118 |
+
out[name] = i
|
| 2119 |
+
|
| 2120 |
+
return out
|
| 2121 |
+
|
| 2122 |
+
|
| 2123 |
+
cdef class ParquetWriter(_Weakrefable):
|
| 2124 |
+
cdef:
|
| 2125 |
+
unique_ptr[FileWriter] writer
|
| 2126 |
+
shared_ptr[COutputStream] sink
|
| 2127 |
+
bint own_sink
|
| 2128 |
+
|
| 2129 |
+
cdef readonly:
|
| 2130 |
+
object use_dictionary
|
| 2131 |
+
object use_deprecated_int96_timestamps
|
| 2132 |
+
object use_byte_stream_split
|
| 2133 |
+
object column_encoding
|
| 2134 |
+
object coerce_timestamps
|
| 2135 |
+
object allow_truncated_timestamps
|
| 2136 |
+
object compression
|
| 2137 |
+
object compression_level
|
| 2138 |
+
object data_page_version
|
| 2139 |
+
object use_compliant_nested_type
|
| 2140 |
+
object version
|
| 2141 |
+
object write_statistics
|
| 2142 |
+
object writer_engine_version
|
| 2143 |
+
int row_group_size
|
| 2144 |
+
int64_t data_page_size
|
| 2145 |
+
FileEncryptionProperties encryption_properties
|
| 2146 |
+
int64_t write_batch_size
|
| 2147 |
+
int64_t dictionary_pagesize_limit
|
| 2148 |
+
object store_schema
|
| 2149 |
+
object store_decimal_as_integer
|
| 2150 |
+
|
| 2151 |
+
def __cinit__(self, where, Schema schema not None, use_dictionary=None,
|
| 2152 |
+
compression=None, version=None,
|
| 2153 |
+
write_statistics=None,
|
| 2154 |
+
MemoryPool memory_pool=None,
|
| 2155 |
+
use_deprecated_int96_timestamps=False,
|
| 2156 |
+
coerce_timestamps=None,
|
| 2157 |
+
data_page_size=None,
|
| 2158 |
+
allow_truncated_timestamps=False,
|
| 2159 |
+
compression_level=None,
|
| 2160 |
+
use_byte_stream_split=False,
|
| 2161 |
+
column_encoding=None,
|
| 2162 |
+
writer_engine_version=None,
|
| 2163 |
+
data_page_version=None,
|
| 2164 |
+
use_compliant_nested_type=True,
|
| 2165 |
+
encryption_properties=None,
|
| 2166 |
+
write_batch_size=None,
|
| 2167 |
+
dictionary_pagesize_limit=None,
|
| 2168 |
+
store_schema=True,
|
| 2169 |
+
write_page_index=False,
|
| 2170 |
+
write_page_checksum=False,
|
| 2171 |
+
sorting_columns=None,
|
| 2172 |
+
store_decimal_as_integer=False):
|
| 2173 |
+
cdef:
|
| 2174 |
+
shared_ptr[WriterProperties] properties
|
| 2175 |
+
shared_ptr[ArrowWriterProperties] arrow_properties
|
| 2176 |
+
c_string c_where
|
| 2177 |
+
CMemoryPool* pool
|
| 2178 |
+
|
| 2179 |
+
try:
|
| 2180 |
+
where = _stringify_path(where)
|
| 2181 |
+
except TypeError:
|
| 2182 |
+
get_writer(where, &self.sink)
|
| 2183 |
+
self.own_sink = False
|
| 2184 |
+
else:
|
| 2185 |
+
c_where = tobytes(where)
|
| 2186 |
+
with nogil:
|
| 2187 |
+
self.sink = GetResultValue(FileOutputStream.Open(c_where))
|
| 2188 |
+
self.own_sink = True
|
| 2189 |
+
|
| 2190 |
+
properties = _create_writer_properties(
|
| 2191 |
+
use_dictionary=use_dictionary,
|
| 2192 |
+
compression=compression,
|
| 2193 |
+
version=version,
|
| 2194 |
+
write_statistics=write_statistics,
|
| 2195 |
+
data_page_size=data_page_size,
|
| 2196 |
+
compression_level=compression_level,
|
| 2197 |
+
use_byte_stream_split=use_byte_stream_split,
|
| 2198 |
+
column_encoding=column_encoding,
|
| 2199 |
+
data_page_version=data_page_version,
|
| 2200 |
+
encryption_properties=encryption_properties,
|
| 2201 |
+
write_batch_size=write_batch_size,
|
| 2202 |
+
dictionary_pagesize_limit=dictionary_pagesize_limit,
|
| 2203 |
+
write_page_index=write_page_index,
|
| 2204 |
+
write_page_checksum=write_page_checksum,
|
| 2205 |
+
sorting_columns=sorting_columns,
|
| 2206 |
+
store_decimal_as_integer=store_decimal_as_integer,
|
| 2207 |
+
)
|
| 2208 |
+
arrow_properties = _create_arrow_writer_properties(
|
| 2209 |
+
use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
|
| 2210 |
+
coerce_timestamps=coerce_timestamps,
|
| 2211 |
+
allow_truncated_timestamps=allow_truncated_timestamps,
|
| 2212 |
+
writer_engine_version=writer_engine_version,
|
| 2213 |
+
use_compliant_nested_type=use_compliant_nested_type,
|
| 2214 |
+
store_schema=store_schema,
|
| 2215 |
+
)
|
| 2216 |
+
|
| 2217 |
+
pool = maybe_unbox_memory_pool(memory_pool)
|
| 2218 |
+
with nogil:
|
| 2219 |
+
self.writer = move(GetResultValue(
|
| 2220 |
+
FileWriter.Open(deref(schema.schema), pool,
|
| 2221 |
+
self.sink, properties, arrow_properties)))
|
| 2222 |
+
|
| 2223 |
+
def close(self):
|
| 2224 |
+
with nogil:
|
| 2225 |
+
check_status(self.writer.get().Close())
|
| 2226 |
+
if self.own_sink:
|
| 2227 |
+
check_status(self.sink.get().Close())
|
| 2228 |
+
|
| 2229 |
+
def write_table(self, Table table, row_group_size=None):
|
| 2230 |
+
cdef:
|
| 2231 |
+
CTable* ctable = table.table
|
| 2232 |
+
int64_t c_row_group_size
|
| 2233 |
+
|
| 2234 |
+
if row_group_size is None or row_group_size == -1:
|
| 2235 |
+
c_row_group_size = min(ctable.num_rows(), _DEFAULT_ROW_GROUP_SIZE)
|
| 2236 |
+
elif row_group_size == 0:
|
| 2237 |
+
raise ValueError('Row group size cannot be 0')
|
| 2238 |
+
else:
|
| 2239 |
+
c_row_group_size = row_group_size
|
| 2240 |
+
|
| 2241 |
+
with nogil:
|
| 2242 |
+
check_status(self.writer.get()
|
| 2243 |
+
.WriteTable(deref(ctable), c_row_group_size))
|
| 2244 |
+
|
| 2245 |
+
def add_key_value_metadata(self, key_value_metadata):
|
| 2246 |
+
cdef:
|
| 2247 |
+
shared_ptr[const CKeyValueMetadata] c_metadata
|
| 2248 |
+
|
| 2249 |
+
c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(key_value_metadata))
|
| 2250 |
+
with nogil:
|
| 2251 |
+
check_status(self.writer.get()
|
| 2252 |
+
.AddKeyValueMetadata(c_metadata))
|
| 2253 |
+
|
| 2254 |
+
@property
|
| 2255 |
+
def metadata(self):
|
| 2256 |
+
cdef:
|
| 2257 |
+
shared_ptr[CFileMetaData] metadata
|
| 2258 |
+
FileMetaData result
|
| 2259 |
+
with nogil:
|
| 2260 |
+
metadata = self.writer.get().metadata()
|
| 2261 |
+
if metadata:
|
| 2262 |
+
result = FileMetaData()
|
| 2263 |
+
result.init(metadata)
|
| 2264 |
+
return result
|
| 2265 |
+
raise RuntimeError(
|
| 2266 |
+
'file metadata is only available after writer close')
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_pyarrow_cpp_tests.pxd
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# distutils: language = c++
|
| 19 |
+
# cython: language_level = 3
|
| 20 |
+
|
| 21 |
+
from pyarrow.includes.common cimport *
|
| 22 |
+
from pyarrow.includes.libarrow cimport CStatus
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
ctypedef CStatus cb_test_func()
|
| 26 |
+
|
| 27 |
+
cdef extern from "arrow/python/python_test.h" namespace "arrow::py::testing" nogil:
|
| 28 |
+
|
| 29 |
+
cdef cppclass CTestCase "arrow::py::testing::TestCase":
|
| 30 |
+
c_string name
|
| 31 |
+
cb_test_func func
|
| 32 |
+
|
| 33 |
+
vector[CTestCase] GetCppTestCases()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_substrait.pyx
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
from cython.operator cimport dereference as deref
|
| 20 |
+
from libcpp.vector cimport vector as std_vector
|
| 21 |
+
|
| 22 |
+
from pyarrow import Buffer, py_buffer
|
| 23 |
+
from pyarrow._compute cimport Expression
|
| 24 |
+
from pyarrow.lib import frombytes, tobytes
|
| 25 |
+
from pyarrow.lib cimport *
|
| 26 |
+
from pyarrow.includes.libarrow cimport *
|
| 27 |
+
from pyarrow.includes.libarrow_substrait cimport *
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
import substrait as py_substrait
|
| 31 |
+
except ImportError:
|
| 32 |
+
py_substrait = None
|
| 33 |
+
else:
|
| 34 |
+
import substrait.proto # no-cython-lint
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# TODO GH-37235: Fix exception handling
|
| 38 |
+
cdef CDeclaration _create_named_table_provider(
|
| 39 |
+
dict named_args, const std_vector[c_string]& names, const CSchema& schema
|
| 40 |
+
) noexcept:
|
| 41 |
+
cdef:
|
| 42 |
+
c_string c_name
|
| 43 |
+
shared_ptr[CTable] c_in_table
|
| 44 |
+
shared_ptr[CTableSourceNodeOptions] c_tablesourceopts
|
| 45 |
+
shared_ptr[CExecNodeOptions] c_input_node_opts
|
| 46 |
+
vector[CDeclaration.Input] no_c_inputs
|
| 47 |
+
|
| 48 |
+
py_names = []
|
| 49 |
+
for i in range(names.size()):
|
| 50 |
+
c_name = names[i]
|
| 51 |
+
py_names.append(frombytes(c_name))
|
| 52 |
+
py_schema = pyarrow_wrap_schema(make_shared[CSchema](schema))
|
| 53 |
+
|
| 54 |
+
py_table = named_args["provider"](py_names, py_schema)
|
| 55 |
+
c_in_table = pyarrow_unwrap_table(py_table)
|
| 56 |
+
c_tablesourceopts = make_shared[CTableSourceNodeOptions](c_in_table)
|
| 57 |
+
c_input_node_opts = static_pointer_cast[CExecNodeOptions, CTableSourceNodeOptions](
|
| 58 |
+
c_tablesourceopts)
|
| 59 |
+
return CDeclaration(tobytes("table_source"),
|
| 60 |
+
no_c_inputs, c_input_node_opts)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def run_query(plan, *, table_provider=None, use_threads=True):
|
| 64 |
+
"""
|
| 65 |
+
Execute a Substrait plan and read the results as a RecordBatchReader.
|
| 66 |
+
|
| 67 |
+
Parameters
|
| 68 |
+
----------
|
| 69 |
+
plan : Union[Buffer, bytes]
|
| 70 |
+
The serialized Substrait plan to execute.
|
| 71 |
+
table_provider : object (optional)
|
| 72 |
+
A function to resolve any NamedTable relation to a table.
|
| 73 |
+
The function will receive two arguments which will be a list
|
| 74 |
+
of strings representing the table name and a pyarrow.Schema representing
|
| 75 |
+
the expected schema and should return a pyarrow.Table.
|
| 76 |
+
use_threads : bool, default True
|
| 77 |
+
If True then multiple threads will be used to run the query. If False then
|
| 78 |
+
all CPU intensive work will be done on the calling thread.
|
| 79 |
+
|
| 80 |
+
Returns
|
| 81 |
+
-------
|
| 82 |
+
RecordBatchReader
|
| 83 |
+
A reader containing the result of the executed query
|
| 84 |
+
|
| 85 |
+
Examples
|
| 86 |
+
--------
|
| 87 |
+
>>> import pyarrow as pa
|
| 88 |
+
>>> from pyarrow.lib import tobytes
|
| 89 |
+
>>> import pyarrow.substrait as substrait
|
| 90 |
+
>>> test_table_1 = pa.Table.from_pydict({"x": [1, 2, 3]})
|
| 91 |
+
>>> test_table_2 = pa.Table.from_pydict({"x": [4, 5, 6]})
|
| 92 |
+
>>> def table_provider(names, schema):
|
| 93 |
+
... if not names:
|
| 94 |
+
... raise Exception("No names provided")
|
| 95 |
+
... elif names[0] == "t1":
|
| 96 |
+
... return test_table_1
|
| 97 |
+
... elif names[1] == "t2":
|
| 98 |
+
... return test_table_2
|
| 99 |
+
... else:
|
| 100 |
+
... raise Exception("Unrecognized table name")
|
| 101 |
+
...
|
| 102 |
+
>>> substrait_query = '''
|
| 103 |
+
... {
|
| 104 |
+
... "relations": [
|
| 105 |
+
... {"rel": {
|
| 106 |
+
... "read": {
|
| 107 |
+
... "base_schema": {
|
| 108 |
+
... "struct": {
|
| 109 |
+
... "types": [
|
| 110 |
+
... {"i64": {}}
|
| 111 |
+
... ]
|
| 112 |
+
... },
|
| 113 |
+
... "names": [
|
| 114 |
+
... "x"
|
| 115 |
+
... ]
|
| 116 |
+
... },
|
| 117 |
+
... "namedTable": {
|
| 118 |
+
... "names": ["t1"]
|
| 119 |
+
... }
|
| 120 |
+
... }
|
| 121 |
+
... }}
|
| 122 |
+
... ]
|
| 123 |
+
... }
|
| 124 |
+
... '''
|
| 125 |
+
>>> buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
|
| 126 |
+
>>> reader = pa.substrait.run_query(buf, table_provider=table_provider)
|
| 127 |
+
>>> reader.read_all()
|
| 128 |
+
pyarrow.Table
|
| 129 |
+
x: int64
|
| 130 |
+
----
|
| 131 |
+
x: [[1,2,3]]
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
cdef:
|
| 135 |
+
CResult[shared_ptr[CRecordBatchReader]] c_res_reader
|
| 136 |
+
shared_ptr[CRecordBatchReader] c_reader
|
| 137 |
+
RecordBatchReader reader
|
| 138 |
+
shared_ptr[CBuffer] c_buf_plan
|
| 139 |
+
CConversionOptions c_conversion_options
|
| 140 |
+
c_bool c_use_threads
|
| 141 |
+
|
| 142 |
+
c_use_threads = use_threads
|
| 143 |
+
if isinstance(plan, (bytes, memoryview)):
|
| 144 |
+
c_buf_plan = pyarrow_unwrap_buffer(py_buffer(plan))
|
| 145 |
+
elif isinstance(plan, Buffer):
|
| 146 |
+
c_buf_plan = pyarrow_unwrap_buffer(plan)
|
| 147 |
+
else:
|
| 148 |
+
raise TypeError(
|
| 149 |
+
f"Expected 'pyarrow.Buffer' or bytes, got '{type(plan)}'")
|
| 150 |
+
|
| 151 |
+
if table_provider is not None:
|
| 152 |
+
named_table_args = {
|
| 153 |
+
"provider": table_provider
|
| 154 |
+
}
|
| 155 |
+
c_conversion_options.named_table_provider = BindFunction[CNamedTableProvider](
|
| 156 |
+
&_create_named_table_provider, named_table_args)
|
| 157 |
+
|
| 158 |
+
with nogil:
|
| 159 |
+
c_res_reader = ExecuteSerializedPlan(
|
| 160 |
+
deref(c_buf_plan), default_extension_id_registry(),
|
| 161 |
+
GetFunctionRegistry(), c_conversion_options, c_use_threads)
|
| 162 |
+
|
| 163 |
+
c_reader = GetResultValue(c_res_reader)
|
| 164 |
+
|
| 165 |
+
reader = RecordBatchReader.__new__(RecordBatchReader)
|
| 166 |
+
reader.reader = c_reader
|
| 167 |
+
return reader
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _parse_json_plan(plan):
|
| 171 |
+
"""
|
| 172 |
+
Parse a JSON plan into equivalent serialized Protobuf.
|
| 173 |
+
|
| 174 |
+
Parameters
|
| 175 |
+
----------
|
| 176 |
+
plan : bytes
|
| 177 |
+
Substrait plan in JSON.
|
| 178 |
+
|
| 179 |
+
Returns
|
| 180 |
+
-------
|
| 181 |
+
Buffer
|
| 182 |
+
A buffer containing the serialized Protobuf plan.
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
cdef:
|
| 186 |
+
CResult[shared_ptr[CBuffer]] c_res_buffer
|
| 187 |
+
c_string c_str_plan
|
| 188 |
+
shared_ptr[CBuffer] c_buf_plan
|
| 189 |
+
|
| 190 |
+
c_str_plan = plan
|
| 191 |
+
c_res_buffer = SerializeJsonPlan(c_str_plan)
|
| 192 |
+
with nogil:
|
| 193 |
+
c_buf_plan = GetResultValue(c_res_buffer)
|
| 194 |
+
return pyarrow_wrap_buffer(c_buf_plan)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
class SubstraitSchema:
|
| 198 |
+
"""A Schema encoded for Substrait usage.
|
| 199 |
+
|
| 200 |
+
The SubstraitSchema contains a schema represented
|
| 201 |
+
both as a substrait ``NamedStruct`` and as an
|
| 202 |
+
``ExtendedExpression``.
|
| 203 |
+
|
| 204 |
+
The ``ExtendedExpression`` is available for cases where types
|
| 205 |
+
used by the schema require extensions to decode them.
|
| 206 |
+
In such case the schema will be the ``base_schema`` of the
|
| 207 |
+
``ExtendedExpression`` and all extensions will be provided.
|
| 208 |
+
"""
|
| 209 |
+
|
| 210 |
+
def __init__(self, schema, expression):
|
| 211 |
+
self.schema = schema
|
| 212 |
+
self.expression = expression
|
| 213 |
+
|
| 214 |
+
def to_pysubstrait(self):
|
| 215 |
+
"""Convert the schema to a substrait-python ExtendedExpression object."""
|
| 216 |
+
if py_substrait is None:
|
| 217 |
+
raise ImportError("The 'substrait' package is required.")
|
| 218 |
+
return py_substrait.proto.ExtendedExpression.FromString(self.expression)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def serialize_schema(schema):
|
| 222 |
+
"""
|
| 223 |
+
Serialize a schema into a SubstraitSchema object.
|
| 224 |
+
|
| 225 |
+
Parameters
|
| 226 |
+
----------
|
| 227 |
+
schema : Schema
|
| 228 |
+
The schema to serialize
|
| 229 |
+
|
| 230 |
+
Returns
|
| 231 |
+
-------
|
| 232 |
+
SubstraitSchema
|
| 233 |
+
The schema stored in a SubstraitSchema object.
|
| 234 |
+
"""
|
| 235 |
+
return SubstraitSchema(
|
| 236 |
+
schema=_serialize_namedstruct_schema(schema),
|
| 237 |
+
expression=serialize_expressions([], [], schema, allow_arrow_extensions=True)
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def _serialize_namedstruct_schema(schema):
|
| 242 |
+
cdef:
|
| 243 |
+
CResult[shared_ptr[CBuffer]] c_res_buffer
|
| 244 |
+
shared_ptr[CBuffer] c_buffer
|
| 245 |
+
CConversionOptions c_conversion_options
|
| 246 |
+
CExtensionSet c_extensions
|
| 247 |
+
|
| 248 |
+
with nogil:
|
| 249 |
+
c_res_buffer = SerializeSchema(deref((<Schema> schema).sp_schema), &c_extensions, c_conversion_options)
|
| 250 |
+
c_buffer = GetResultValue(c_res_buffer)
|
| 251 |
+
|
| 252 |
+
return memoryview(pyarrow_wrap_buffer(c_buffer))
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def deserialize_schema(buf):
|
| 256 |
+
"""
|
| 257 |
+
Deserialize a ``NamedStruct`` Substrait message
|
| 258 |
+
or a SubstraitSchema object into an Arrow Schema object
|
| 259 |
+
|
| 260 |
+
Parameters
|
| 261 |
+
----------
|
| 262 |
+
buf : Buffer or bytes or SubstraitSchema
|
| 263 |
+
The message to deserialize
|
| 264 |
+
|
| 265 |
+
Returns
|
| 266 |
+
-------
|
| 267 |
+
Schema
|
| 268 |
+
The deserialized schema
|
| 269 |
+
"""
|
| 270 |
+
cdef:
|
| 271 |
+
shared_ptr[CBuffer] c_buffer
|
| 272 |
+
CResult[shared_ptr[CSchema]] c_res_schema
|
| 273 |
+
shared_ptr[CSchema] c_schema
|
| 274 |
+
CConversionOptions c_conversion_options
|
| 275 |
+
CExtensionSet c_extensions
|
| 276 |
+
|
| 277 |
+
if isinstance(buf, SubstraitSchema):
|
| 278 |
+
return deserialize_expressions(buf.expression).schema
|
| 279 |
+
|
| 280 |
+
if isinstance(buf, (bytes, memoryview)):
|
| 281 |
+
c_buffer = pyarrow_unwrap_buffer(py_buffer(buf))
|
| 282 |
+
elif isinstance(buf, Buffer):
|
| 283 |
+
c_buffer = pyarrow_unwrap_buffer(buf)
|
| 284 |
+
else:
|
| 285 |
+
raise TypeError(
|
| 286 |
+
f"Expected 'pyarrow.Buffer' or bytes, got '{type(buf)}'")
|
| 287 |
+
|
| 288 |
+
with nogil:
|
| 289 |
+
c_res_schema = DeserializeSchema(
|
| 290 |
+
deref(c_buffer), c_extensions, c_conversion_options)
|
| 291 |
+
c_schema = GetResultValue(c_res_schema)
|
| 292 |
+
|
| 293 |
+
return pyarrow_wrap_schema(c_schema)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def serialize_expressions(exprs, names, schema, *, allow_arrow_extensions=False):
|
| 297 |
+
"""
|
| 298 |
+
Serialize a collection of expressions into Substrait
|
| 299 |
+
|
| 300 |
+
Substrait expressions must be bound to a schema. For example,
|
| 301 |
+
the Substrait expression ``a:i32 + b:i32`` is different from the
|
| 302 |
+
Substrait expression ``a:i64 + b:i64``. Pyarrow expressions are
|
| 303 |
+
typically unbound. For example, both of the above expressions
|
| 304 |
+
would be represented as ``a + b`` in pyarrow.
|
| 305 |
+
|
| 306 |
+
This means a schema must be provided when serializing an expression.
|
| 307 |
+
It also means that the serialization may fail if a matching function
|
| 308 |
+
call cannot be found for the expression.
|
| 309 |
+
|
| 310 |
+
Parameters
|
| 311 |
+
----------
|
| 312 |
+
exprs : list of Expression
|
| 313 |
+
The expressions to serialize
|
| 314 |
+
names : list of str
|
| 315 |
+
Names for the expressions
|
| 316 |
+
schema : Schema
|
| 317 |
+
The schema the expressions will be bound to
|
| 318 |
+
allow_arrow_extensions : bool, default False
|
| 319 |
+
If False then only functions that are part of the core Substrait function
|
| 320 |
+
definitions will be allowed. Set this to True to allow pyarrow-specific functions
|
| 321 |
+
and user defined functions but the result may not be accepted by other
|
| 322 |
+
compute libraries.
|
| 323 |
+
|
| 324 |
+
Returns
|
| 325 |
+
-------
|
| 326 |
+
Buffer
|
| 327 |
+
An ExtendedExpression message containing the serialized expressions
|
| 328 |
+
"""
|
| 329 |
+
cdef:
|
| 330 |
+
CResult[shared_ptr[CBuffer]] c_res_buffer
|
| 331 |
+
shared_ptr[CBuffer] c_buffer
|
| 332 |
+
CNamedExpression c_named_expr
|
| 333 |
+
CBoundExpressions c_bound_exprs
|
| 334 |
+
CConversionOptions c_conversion_options
|
| 335 |
+
|
| 336 |
+
if len(exprs) != len(names):
|
| 337 |
+
raise ValueError("exprs and names need to have the same length")
|
| 338 |
+
for expr, name in zip(exprs, names):
|
| 339 |
+
if not isinstance(expr, Expression):
|
| 340 |
+
raise TypeError(f"Expected Expression, got '{type(expr)}' in exprs")
|
| 341 |
+
if not isinstance(name, str):
|
| 342 |
+
raise TypeError(f"Expected str, got '{type(name)}' in names")
|
| 343 |
+
c_named_expr.expression = (<Expression> expr).unwrap()
|
| 344 |
+
c_named_expr.name = tobytes(<str> name)
|
| 345 |
+
c_bound_exprs.named_expressions.push_back(c_named_expr)
|
| 346 |
+
|
| 347 |
+
c_bound_exprs.schema = (<Schema> schema).sp_schema
|
| 348 |
+
|
| 349 |
+
c_conversion_options.allow_arrow_extensions = allow_arrow_extensions
|
| 350 |
+
|
| 351 |
+
with nogil:
|
| 352 |
+
c_res_buffer = SerializeExpressions(c_bound_exprs, c_conversion_options)
|
| 353 |
+
c_buffer = GetResultValue(c_res_buffer)
|
| 354 |
+
return memoryview(pyarrow_wrap_buffer(c_buffer))
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
cdef class BoundExpressions(_Weakrefable):
|
| 358 |
+
"""
|
| 359 |
+
A collection of named expressions and the schema they are bound to
|
| 360 |
+
|
| 361 |
+
This is equivalent to the Substrait ExtendedExpression message
|
| 362 |
+
"""
|
| 363 |
+
|
| 364 |
+
cdef:
|
| 365 |
+
CBoundExpressions c_bound_exprs
|
| 366 |
+
|
| 367 |
+
def __init__(self):
|
| 368 |
+
msg = 'BoundExpressions is an abstract class thus cannot be initialized.'
|
| 369 |
+
raise TypeError(msg)
|
| 370 |
+
|
| 371 |
+
cdef void init(self, CBoundExpressions bound_expressions):
|
| 372 |
+
self.c_bound_exprs = bound_expressions
|
| 373 |
+
|
| 374 |
+
@property
|
| 375 |
+
def schema(self):
|
| 376 |
+
"""
|
| 377 |
+
The common schema that all expressions are bound to
|
| 378 |
+
"""
|
| 379 |
+
return pyarrow_wrap_schema(self.c_bound_exprs.schema)
|
| 380 |
+
|
| 381 |
+
@property
|
| 382 |
+
def expressions(self):
|
| 383 |
+
"""
|
| 384 |
+
A dict from expression name to expression
|
| 385 |
+
"""
|
| 386 |
+
expr_dict = {}
|
| 387 |
+
for named_expr in self.c_bound_exprs.named_expressions:
|
| 388 |
+
name = frombytes(named_expr.name)
|
| 389 |
+
expr = Expression.wrap(named_expr.expression)
|
| 390 |
+
expr_dict[name] = expr
|
| 391 |
+
return expr_dict
|
| 392 |
+
|
| 393 |
+
@staticmethod
|
| 394 |
+
cdef wrap(const CBoundExpressions& bound_expressions):
|
| 395 |
+
cdef BoundExpressions self = BoundExpressions.__new__(BoundExpressions)
|
| 396 |
+
self.init(bound_expressions)
|
| 397 |
+
return self
|
| 398 |
+
|
| 399 |
+
@classmethod
|
| 400 |
+
def from_substrait(cls, message):
|
| 401 |
+
"""
|
| 402 |
+
Convert a Substrait message into a BoundExpressions object
|
| 403 |
+
|
| 404 |
+
Parameters
|
| 405 |
+
----------
|
| 406 |
+
message : Buffer or bytes or protobuf Message
|
| 407 |
+
The message to convert to a BoundExpressions object
|
| 408 |
+
|
| 409 |
+
Returns
|
| 410 |
+
-------
|
| 411 |
+
BoundExpressions
|
| 412 |
+
The converted expressions, their names, and the bound schema
|
| 413 |
+
"""
|
| 414 |
+
if isinstance(message, (bytes, memoryview)):
|
| 415 |
+
return deserialize_expressions(message)
|
| 416 |
+
elif isinstance(message, Buffer):
|
| 417 |
+
return deserialize_expressions(message)
|
| 418 |
+
else:
|
| 419 |
+
try:
|
| 420 |
+
return deserialize_expressions(message.SerializeToString())
|
| 421 |
+
except AttributeError:
|
| 422 |
+
raise TypeError(
|
| 423 |
+
f"Expected 'pyarrow.Buffer' or bytes or protobuf Message, got '{type(message)}'")
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
def deserialize_expressions(buf):
|
| 427 |
+
"""
|
| 428 |
+
Deserialize an ExtendedExpression Substrait message into a BoundExpressions object
|
| 429 |
+
|
| 430 |
+
Parameters
|
| 431 |
+
----------
|
| 432 |
+
buf : Buffer or bytes
|
| 433 |
+
The message to deserialize
|
| 434 |
+
|
| 435 |
+
Returns
|
| 436 |
+
-------
|
| 437 |
+
BoundExpressions
|
| 438 |
+
The deserialized expressions, their names, and the bound schema
|
| 439 |
+
"""
|
| 440 |
+
cdef:
|
| 441 |
+
shared_ptr[CBuffer] c_buffer
|
| 442 |
+
CResult[CBoundExpressions] c_res_bound_exprs
|
| 443 |
+
CBoundExpressions c_bound_exprs
|
| 444 |
+
|
| 445 |
+
if isinstance(buf, (bytes, memoryview)):
|
| 446 |
+
c_buffer = pyarrow_unwrap_buffer(py_buffer(buf))
|
| 447 |
+
elif isinstance(buf, Buffer):
|
| 448 |
+
c_buffer = pyarrow_unwrap_buffer(buf)
|
| 449 |
+
else:
|
| 450 |
+
raise TypeError(
|
| 451 |
+
f"Expected 'pyarrow.Buffer' or bytes, got '{type(buf)}'")
|
| 452 |
+
|
| 453 |
+
with nogil:
|
| 454 |
+
c_res_bound_exprs = DeserializeExpressions(deref(c_buffer))
|
| 455 |
+
c_bound_exprs = GetResultValue(c_res_bound_exprs)
|
| 456 |
+
|
| 457 |
+
return BoundExpressions.wrap(c_bound_exprs)
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
def get_supported_functions():
|
| 461 |
+
"""
|
| 462 |
+
Get a list of Substrait functions that the underlying
|
| 463 |
+
engine currently supports.
|
| 464 |
+
|
| 465 |
+
Returns
|
| 466 |
+
-------
|
| 467 |
+
list[str]
|
| 468 |
+
A list of function ids encoded as '{uri}#{name}'
|
| 469 |
+
"""
|
| 470 |
+
|
| 471 |
+
cdef:
|
| 472 |
+
ExtensionIdRegistry* c_id_registry
|
| 473 |
+
std_vector[c_string] c_ids
|
| 474 |
+
|
| 475 |
+
c_id_registry = default_extension_id_registry()
|
| 476 |
+
c_ids = c_id_registry.GetSupportedSubstraitFunctions()
|
| 477 |
+
|
| 478 |
+
functions_list = []
|
| 479 |
+
for c_id in c_ids:
|
| 480 |
+
functions_list.append(frombytes(c_id))
|
| 481 |
+
return functions_list
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cffi.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
from __future__ import absolute_import
|
| 19 |
+
|
| 20 |
+
import cffi
|
| 21 |
+
|
| 22 |
+
c_source = """
|
| 23 |
+
struct ArrowSchema {
|
| 24 |
+
// Array type description
|
| 25 |
+
const char* format;
|
| 26 |
+
const char* name;
|
| 27 |
+
const char* metadata;
|
| 28 |
+
int64_t flags;
|
| 29 |
+
int64_t n_children;
|
| 30 |
+
struct ArrowSchema** children;
|
| 31 |
+
struct ArrowSchema* dictionary;
|
| 32 |
+
|
| 33 |
+
// Release callback
|
| 34 |
+
void (*release)(struct ArrowSchema*);
|
| 35 |
+
// Opaque producer-specific data
|
| 36 |
+
void* private_data;
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
struct ArrowArray {
|
| 40 |
+
// Array data description
|
| 41 |
+
int64_t length;
|
| 42 |
+
int64_t null_count;
|
| 43 |
+
int64_t offset;
|
| 44 |
+
int64_t n_buffers;
|
| 45 |
+
int64_t n_children;
|
| 46 |
+
const void** buffers;
|
| 47 |
+
struct ArrowArray** children;
|
| 48 |
+
struct ArrowArray* dictionary;
|
| 49 |
+
|
| 50 |
+
// Release callback
|
| 51 |
+
void (*release)(struct ArrowArray*);
|
| 52 |
+
// Opaque producer-specific data
|
| 53 |
+
void* private_data;
|
| 54 |
+
};
|
| 55 |
+
|
| 56 |
+
struct ArrowArrayStream {
|
| 57 |
+
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
|
| 58 |
+
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
|
| 59 |
+
|
| 60 |
+
const char* (*get_last_error)(struct ArrowArrayStream*);
|
| 61 |
+
|
| 62 |
+
// Release callback
|
| 63 |
+
void (*release)(struct ArrowArrayStream*);
|
| 64 |
+
// Opaque producer-specific data
|
| 65 |
+
void* private_data;
|
| 66 |
+
};
|
| 67 |
+
|
| 68 |
+
typedef int32_t ArrowDeviceType;
|
| 69 |
+
|
| 70 |
+
struct ArrowDeviceArray {
|
| 71 |
+
struct ArrowArray array;
|
| 72 |
+
int64_t device_id;
|
| 73 |
+
ArrowDeviceType device_type;
|
| 74 |
+
void* sync_event;
|
| 75 |
+
int64_t reserved[3];
|
| 76 |
+
};
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
# TODO use out-of-line mode for faster import and avoid C parsing
|
| 80 |
+
ffi = cffi.FFI()
|
| 81 |
+
ffi.cdef(c_source)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/compute.py
ADDED
|
@@ -0,0 +1,744 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
from pyarrow._compute import ( # noqa
|
| 19 |
+
Function,
|
| 20 |
+
FunctionOptions,
|
| 21 |
+
FunctionRegistry,
|
| 22 |
+
HashAggregateFunction,
|
| 23 |
+
HashAggregateKernel,
|
| 24 |
+
Kernel,
|
| 25 |
+
ScalarAggregateFunction,
|
| 26 |
+
ScalarAggregateKernel,
|
| 27 |
+
ScalarFunction,
|
| 28 |
+
ScalarKernel,
|
| 29 |
+
VectorFunction,
|
| 30 |
+
VectorKernel,
|
| 31 |
+
# Option classes
|
| 32 |
+
ArraySortOptions,
|
| 33 |
+
AssumeTimezoneOptions,
|
| 34 |
+
CastOptions,
|
| 35 |
+
CountOptions,
|
| 36 |
+
CumulativeOptions,
|
| 37 |
+
CumulativeSumOptions,
|
| 38 |
+
DayOfWeekOptions,
|
| 39 |
+
DictionaryEncodeOptions,
|
| 40 |
+
RunEndEncodeOptions,
|
| 41 |
+
ElementWiseAggregateOptions,
|
| 42 |
+
ExtractRegexOptions,
|
| 43 |
+
FilterOptions,
|
| 44 |
+
IndexOptions,
|
| 45 |
+
JoinOptions,
|
| 46 |
+
ListSliceOptions,
|
| 47 |
+
ListFlattenOptions,
|
| 48 |
+
MakeStructOptions,
|
| 49 |
+
MapLookupOptions,
|
| 50 |
+
MatchSubstringOptions,
|
| 51 |
+
ModeOptions,
|
| 52 |
+
NullOptions,
|
| 53 |
+
PadOptions,
|
| 54 |
+
PairwiseOptions,
|
| 55 |
+
PartitionNthOptions,
|
| 56 |
+
QuantileOptions,
|
| 57 |
+
RandomOptions,
|
| 58 |
+
RankOptions,
|
| 59 |
+
ReplaceSliceOptions,
|
| 60 |
+
ReplaceSubstringOptions,
|
| 61 |
+
RoundBinaryOptions,
|
| 62 |
+
RoundOptions,
|
| 63 |
+
RoundTemporalOptions,
|
| 64 |
+
RoundToMultipleOptions,
|
| 65 |
+
ScalarAggregateOptions,
|
| 66 |
+
SelectKOptions,
|
| 67 |
+
SetLookupOptions,
|
| 68 |
+
SliceOptions,
|
| 69 |
+
SortOptions,
|
| 70 |
+
SplitOptions,
|
| 71 |
+
SplitPatternOptions,
|
| 72 |
+
StrftimeOptions,
|
| 73 |
+
StrptimeOptions,
|
| 74 |
+
StructFieldOptions,
|
| 75 |
+
TakeOptions,
|
| 76 |
+
TDigestOptions,
|
| 77 |
+
TrimOptions,
|
| 78 |
+
Utf8NormalizeOptions,
|
| 79 |
+
VarianceOptions,
|
| 80 |
+
WeekOptions,
|
| 81 |
+
# Functions
|
| 82 |
+
call_function,
|
| 83 |
+
function_registry,
|
| 84 |
+
get_function,
|
| 85 |
+
list_functions,
|
| 86 |
+
# Udf
|
| 87 |
+
call_tabular_function,
|
| 88 |
+
register_scalar_function,
|
| 89 |
+
register_tabular_function,
|
| 90 |
+
register_aggregate_function,
|
| 91 |
+
register_vector_function,
|
| 92 |
+
UdfContext,
|
| 93 |
+
# Expressions
|
| 94 |
+
Expression,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
from collections import namedtuple
|
| 98 |
+
import inspect
|
| 99 |
+
from textwrap import dedent
|
| 100 |
+
import warnings
|
| 101 |
+
|
| 102 |
+
import pyarrow as pa
|
| 103 |
+
from pyarrow import _compute_docstrings
|
| 104 |
+
from pyarrow.vendored import docscrape
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _get_arg_names(func):
|
| 108 |
+
return func._doc.arg_names
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
_OptionsClassDoc = namedtuple('_OptionsClassDoc', ('params',))
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _scrape_options_class_doc(options_class):
|
| 115 |
+
if not options_class.__doc__:
|
| 116 |
+
return None
|
| 117 |
+
doc = docscrape.NumpyDocString(options_class.__doc__)
|
| 118 |
+
return _OptionsClassDoc(doc['Parameters'])
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _decorate_compute_function(wrapper, exposed_name, func, options_class):
|
| 122 |
+
# Decorate the given compute function wrapper with useful metadata
|
| 123 |
+
# and documentation.
|
| 124 |
+
cpp_doc = func._doc
|
| 125 |
+
|
| 126 |
+
wrapper.__arrow_compute_function__ = dict(
|
| 127 |
+
name=func.name,
|
| 128 |
+
arity=func.arity,
|
| 129 |
+
options_class=cpp_doc.options_class,
|
| 130 |
+
options_required=cpp_doc.options_required)
|
| 131 |
+
wrapper.__name__ = exposed_name
|
| 132 |
+
wrapper.__qualname__ = exposed_name
|
| 133 |
+
|
| 134 |
+
doc_pieces = []
|
| 135 |
+
|
| 136 |
+
# 1. One-line summary
|
| 137 |
+
summary = cpp_doc.summary
|
| 138 |
+
if not summary:
|
| 139 |
+
arg_str = "arguments" if func.arity > 1 else "argument"
|
| 140 |
+
summary = ("Call compute function {!r} with the given {}"
|
| 141 |
+
.format(func.name, arg_str))
|
| 142 |
+
|
| 143 |
+
doc_pieces.append(f"{summary}.\n\n")
|
| 144 |
+
|
| 145 |
+
# 2. Multi-line description
|
| 146 |
+
description = cpp_doc.description
|
| 147 |
+
if description:
|
| 148 |
+
doc_pieces.append(f"{description}\n\n")
|
| 149 |
+
|
| 150 |
+
doc_addition = _compute_docstrings.function_doc_additions.get(func.name)
|
| 151 |
+
|
| 152 |
+
# 3. Parameter description
|
| 153 |
+
doc_pieces.append(dedent("""\
|
| 154 |
+
Parameters
|
| 155 |
+
----------
|
| 156 |
+
"""))
|
| 157 |
+
|
| 158 |
+
# 3a. Compute function parameters
|
| 159 |
+
arg_names = _get_arg_names(func)
|
| 160 |
+
for arg_name in arg_names:
|
| 161 |
+
if func.kind in ('vector', 'scalar_aggregate'):
|
| 162 |
+
arg_type = 'Array-like'
|
| 163 |
+
else:
|
| 164 |
+
arg_type = 'Array-like or scalar-like'
|
| 165 |
+
doc_pieces.append(f"{arg_name} : {arg_type}\n")
|
| 166 |
+
doc_pieces.append(" Argument to compute function.\n")
|
| 167 |
+
|
| 168 |
+
# 3b. Compute function option values
|
| 169 |
+
if options_class is not None:
|
| 170 |
+
options_class_doc = _scrape_options_class_doc(options_class)
|
| 171 |
+
if options_class_doc:
|
| 172 |
+
for p in options_class_doc.params:
|
| 173 |
+
doc_pieces.append(f"{p.name} : {p.type}\n")
|
| 174 |
+
for s in p.desc:
|
| 175 |
+
doc_pieces.append(f" {s}\n")
|
| 176 |
+
else:
|
| 177 |
+
warnings.warn(f"Options class {options_class.__name__} "
|
| 178 |
+
f"does not have a docstring", RuntimeWarning)
|
| 179 |
+
options_sig = inspect.signature(options_class)
|
| 180 |
+
for p in options_sig.parameters.values():
|
| 181 |
+
doc_pieces.append(dedent("""\
|
| 182 |
+
{0} : optional
|
| 183 |
+
Parameter for {1} constructor. Either `options`
|
| 184 |
+
or `{0}` can be passed, but not both at the same time.
|
| 185 |
+
""".format(p.name, options_class.__name__)))
|
| 186 |
+
doc_pieces.append(dedent(f"""\
|
| 187 |
+
options : pyarrow.compute.{options_class.__name__}, optional
|
| 188 |
+
Alternative way of passing options.
|
| 189 |
+
"""))
|
| 190 |
+
|
| 191 |
+
doc_pieces.append(dedent("""\
|
| 192 |
+
memory_pool : pyarrow.MemoryPool, optional
|
| 193 |
+
If not passed, will allocate memory from the default memory pool.
|
| 194 |
+
"""))
|
| 195 |
+
|
| 196 |
+
# 4. Custom addition (e.g. examples)
|
| 197 |
+
if doc_addition is not None:
|
| 198 |
+
doc_pieces.append("\n{}\n".format(dedent(doc_addition).strip("\n")))
|
| 199 |
+
|
| 200 |
+
wrapper.__doc__ = "".join(doc_pieces)
|
| 201 |
+
return wrapper
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def _get_options_class(func):
|
| 205 |
+
class_name = func._doc.options_class
|
| 206 |
+
if not class_name:
|
| 207 |
+
return None
|
| 208 |
+
try:
|
| 209 |
+
return globals()[class_name]
|
| 210 |
+
except KeyError:
|
| 211 |
+
warnings.warn("Python binding for {} not exposed"
|
| 212 |
+
.format(class_name), RuntimeWarning)
|
| 213 |
+
return None
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def _handle_options(name, options_class, options, args, kwargs):
|
| 217 |
+
if args or kwargs:
|
| 218 |
+
if options is not None:
|
| 219 |
+
raise TypeError(
|
| 220 |
+
"Function {!r} called with both an 'options' argument "
|
| 221 |
+
"and additional arguments"
|
| 222 |
+
.format(name))
|
| 223 |
+
return options_class(*args, **kwargs)
|
| 224 |
+
|
| 225 |
+
if options is not None:
|
| 226 |
+
if isinstance(options, dict):
|
| 227 |
+
return options_class(**options)
|
| 228 |
+
elif isinstance(options, options_class):
|
| 229 |
+
return options
|
| 230 |
+
raise TypeError(
|
| 231 |
+
"Function {!r} expected a {} parameter, got {}"
|
| 232 |
+
.format(name, options_class, type(options)))
|
| 233 |
+
|
| 234 |
+
return None
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def _make_generic_wrapper(func_name, func, options_class, arity):
|
| 238 |
+
if options_class is None:
|
| 239 |
+
def wrapper(*args, memory_pool=None):
|
| 240 |
+
if arity is not Ellipsis and len(args) != arity:
|
| 241 |
+
raise TypeError(
|
| 242 |
+
f"{func_name} takes {arity} positional argument(s), "
|
| 243 |
+
f"but {len(args)} were given"
|
| 244 |
+
)
|
| 245 |
+
if args and isinstance(args[0], Expression):
|
| 246 |
+
return Expression._call(func_name, list(args))
|
| 247 |
+
return func.call(args, None, memory_pool)
|
| 248 |
+
else:
|
| 249 |
+
def wrapper(*args, memory_pool=None, options=None, **kwargs):
|
| 250 |
+
if arity is not Ellipsis:
|
| 251 |
+
if len(args) < arity:
|
| 252 |
+
raise TypeError(
|
| 253 |
+
f"{func_name} takes {arity} positional argument(s), "
|
| 254 |
+
f"but {len(args)} were given"
|
| 255 |
+
)
|
| 256 |
+
option_args = args[arity:]
|
| 257 |
+
args = args[:arity]
|
| 258 |
+
else:
|
| 259 |
+
option_args = ()
|
| 260 |
+
options = _handle_options(func_name, options_class, options,
|
| 261 |
+
option_args, kwargs)
|
| 262 |
+
if args and isinstance(args[0], Expression):
|
| 263 |
+
return Expression._call(func_name, list(args), options)
|
| 264 |
+
return func.call(args, options, memory_pool)
|
| 265 |
+
return wrapper
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def _make_signature(arg_names, var_arg_names, options_class):
|
| 269 |
+
from inspect import Parameter
|
| 270 |
+
params = []
|
| 271 |
+
for name in arg_names:
|
| 272 |
+
params.append(Parameter(name, Parameter.POSITIONAL_ONLY))
|
| 273 |
+
for name in var_arg_names:
|
| 274 |
+
params.append(Parameter(name, Parameter.VAR_POSITIONAL))
|
| 275 |
+
if options_class is not None:
|
| 276 |
+
options_sig = inspect.signature(options_class)
|
| 277 |
+
for p in options_sig.parameters.values():
|
| 278 |
+
assert p.kind in (Parameter.POSITIONAL_OR_KEYWORD,
|
| 279 |
+
Parameter.KEYWORD_ONLY)
|
| 280 |
+
if var_arg_names:
|
| 281 |
+
# Cannot have a positional argument after a *args
|
| 282 |
+
p = p.replace(kind=Parameter.KEYWORD_ONLY)
|
| 283 |
+
params.append(p)
|
| 284 |
+
params.append(Parameter("options", Parameter.KEYWORD_ONLY,
|
| 285 |
+
default=None))
|
| 286 |
+
params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
|
| 287 |
+
default=None))
|
| 288 |
+
return inspect.Signature(params)
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def _wrap_function(name, func):
|
| 292 |
+
options_class = _get_options_class(func)
|
| 293 |
+
arg_names = _get_arg_names(func)
|
| 294 |
+
has_vararg = arg_names and arg_names[-1].startswith('*')
|
| 295 |
+
if has_vararg:
|
| 296 |
+
var_arg_names = [arg_names.pop().lstrip('*')]
|
| 297 |
+
else:
|
| 298 |
+
var_arg_names = []
|
| 299 |
+
|
| 300 |
+
wrapper = _make_generic_wrapper(
|
| 301 |
+
name, func, options_class, arity=func.arity)
|
| 302 |
+
wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
|
| 303 |
+
options_class)
|
| 304 |
+
return _decorate_compute_function(wrapper, name, func, options_class)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def _make_global_functions():
|
| 308 |
+
"""
|
| 309 |
+
Make global functions wrapping each compute function.
|
| 310 |
+
|
| 311 |
+
Note that some of the automatically-generated wrappers may be overridden
|
| 312 |
+
by custom versions below.
|
| 313 |
+
"""
|
| 314 |
+
g = globals()
|
| 315 |
+
reg = function_registry()
|
| 316 |
+
|
| 317 |
+
# Avoid clashes with Python keywords
|
| 318 |
+
rewrites = {'and': 'and_',
|
| 319 |
+
'or': 'or_'}
|
| 320 |
+
|
| 321 |
+
for cpp_name in reg.list_functions():
|
| 322 |
+
name = rewrites.get(cpp_name, cpp_name)
|
| 323 |
+
func = reg.get_function(cpp_name)
|
| 324 |
+
if func.kind == "hash_aggregate":
|
| 325 |
+
# Hash aggregate functions are not callable,
|
| 326 |
+
# so let's not expose them at module level.
|
| 327 |
+
continue
|
| 328 |
+
if func.kind == "scalar_aggregate" and func.arity == 0:
|
| 329 |
+
# Nullary scalar aggregate functions are not callable
|
| 330 |
+
# directly so let's not expose them at module level.
|
| 331 |
+
continue
|
| 332 |
+
assert name not in g, name
|
| 333 |
+
g[cpp_name] = g[name] = _wrap_function(name, func)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
_make_global_functions()
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def cast(arr, target_type=None, safe=None, options=None, memory_pool=None):
|
| 340 |
+
"""
|
| 341 |
+
Cast array values to another data type. Can also be invoked as an array
|
| 342 |
+
instance method.
|
| 343 |
+
|
| 344 |
+
Parameters
|
| 345 |
+
----------
|
| 346 |
+
arr : Array-like
|
| 347 |
+
target_type : DataType or str
|
| 348 |
+
Type to cast to
|
| 349 |
+
safe : bool, default True
|
| 350 |
+
Check for overflows or other unsafe conversions
|
| 351 |
+
options : CastOptions, default None
|
| 352 |
+
Additional checks pass by CastOptions
|
| 353 |
+
memory_pool : MemoryPool, optional
|
| 354 |
+
memory pool to use for allocations during function execution.
|
| 355 |
+
|
| 356 |
+
Examples
|
| 357 |
+
--------
|
| 358 |
+
>>> from datetime import datetime
|
| 359 |
+
>>> import pyarrow as pa
|
| 360 |
+
>>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
|
| 361 |
+
>>> arr.type
|
| 362 |
+
TimestampType(timestamp[us])
|
| 363 |
+
|
| 364 |
+
You can use ``pyarrow.DataType`` objects to specify the target type:
|
| 365 |
+
|
| 366 |
+
>>> cast(arr, pa.timestamp('ms'))
|
| 367 |
+
<pyarrow.lib.TimestampArray object at ...>
|
| 368 |
+
[
|
| 369 |
+
2010-01-01 00:00:00.000,
|
| 370 |
+
2015-01-01 00:00:00.000
|
| 371 |
+
]
|
| 372 |
+
|
| 373 |
+
>>> cast(arr, pa.timestamp('ms')).type
|
| 374 |
+
TimestampType(timestamp[ms])
|
| 375 |
+
|
| 376 |
+
Alternatively, it is also supported to use the string aliases for these
|
| 377 |
+
types:
|
| 378 |
+
|
| 379 |
+
>>> arr.cast('timestamp[ms]')
|
| 380 |
+
<pyarrow.lib.TimestampArray object at ...>
|
| 381 |
+
[
|
| 382 |
+
2010-01-01 00:00:00.000,
|
| 383 |
+
2015-01-01 00:00:00.000
|
| 384 |
+
]
|
| 385 |
+
>>> arr.cast('timestamp[ms]').type
|
| 386 |
+
TimestampType(timestamp[ms])
|
| 387 |
+
|
| 388 |
+
Returns
|
| 389 |
+
-------
|
| 390 |
+
casted : Array
|
| 391 |
+
The cast result as a new Array
|
| 392 |
+
"""
|
| 393 |
+
safe_vars_passed = (safe is not None) or (target_type is not None)
|
| 394 |
+
|
| 395 |
+
if safe_vars_passed and (options is not None):
|
| 396 |
+
raise ValueError("Must either pass values for 'target_type' and 'safe'"
|
| 397 |
+
" or pass a value for 'options'")
|
| 398 |
+
|
| 399 |
+
if options is None:
|
| 400 |
+
target_type = pa.types.lib.ensure_type(target_type)
|
| 401 |
+
if safe is False:
|
| 402 |
+
options = CastOptions.unsafe(target_type)
|
| 403 |
+
else:
|
| 404 |
+
options = CastOptions.safe(target_type)
|
| 405 |
+
return call_function("cast", [arr], options, memory_pool)
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def index(data, value, start=None, end=None, *, memory_pool=None):
|
| 409 |
+
"""
|
| 410 |
+
Find the index of the first occurrence of a given value.
|
| 411 |
+
|
| 412 |
+
Parameters
|
| 413 |
+
----------
|
| 414 |
+
data : Array-like
|
| 415 |
+
value : Scalar-like object
|
| 416 |
+
The value to search for.
|
| 417 |
+
start : int, optional
|
| 418 |
+
end : int, optional
|
| 419 |
+
memory_pool : MemoryPool, optional
|
| 420 |
+
If not passed, will allocate memory from the default memory pool.
|
| 421 |
+
|
| 422 |
+
Returns
|
| 423 |
+
-------
|
| 424 |
+
index : int
|
| 425 |
+
the index, or -1 if not found
|
| 426 |
+
|
| 427 |
+
Examples
|
| 428 |
+
--------
|
| 429 |
+
>>> import pyarrow as pa
|
| 430 |
+
>>> import pyarrow.compute as pc
|
| 431 |
+
>>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
|
| 432 |
+
>>> pc.index(arr, "ipsum")
|
| 433 |
+
<pyarrow.Int64Scalar: 1>
|
| 434 |
+
>>> pc.index(arr, "ipsum", start=2)
|
| 435 |
+
<pyarrow.Int64Scalar: 5>
|
| 436 |
+
>>> pc.index(arr, "amet")
|
| 437 |
+
<pyarrow.Int64Scalar: -1>
|
| 438 |
+
"""
|
| 439 |
+
if start is not None:
|
| 440 |
+
if end is not None:
|
| 441 |
+
data = data.slice(start, end - start)
|
| 442 |
+
else:
|
| 443 |
+
data = data.slice(start)
|
| 444 |
+
elif end is not None:
|
| 445 |
+
data = data.slice(0, end)
|
| 446 |
+
|
| 447 |
+
if not isinstance(value, pa.Scalar):
|
| 448 |
+
value = pa.scalar(value, type=data.type)
|
| 449 |
+
elif data.type != value.type:
|
| 450 |
+
value = pa.scalar(value.as_py(), type=data.type)
|
| 451 |
+
options = IndexOptions(value=value)
|
| 452 |
+
result = call_function('index', [data], options, memory_pool)
|
| 453 |
+
if start is not None and result.as_py() >= 0:
|
| 454 |
+
result = pa.scalar(result.as_py() + start, type=pa.int64())
|
| 455 |
+
return result
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
def take(data, indices, *, boundscheck=True, memory_pool=None):
|
| 459 |
+
"""
|
| 460 |
+
Select values (or records) from array- or table-like data given integer
|
| 461 |
+
selection indices.
|
| 462 |
+
|
| 463 |
+
The result will be of the same type(s) as the input, with elements taken
|
| 464 |
+
from the input array (or record batch / table fields) at the given
|
| 465 |
+
indices. If an index is null then the corresponding value in the output
|
| 466 |
+
will be null.
|
| 467 |
+
|
| 468 |
+
Parameters
|
| 469 |
+
----------
|
| 470 |
+
data : Array, ChunkedArray, RecordBatch, or Table
|
| 471 |
+
indices : Array, ChunkedArray
|
| 472 |
+
Must be of integer type
|
| 473 |
+
boundscheck : boolean, default True
|
| 474 |
+
Whether to boundscheck the indices. If False and there is an out of
|
| 475 |
+
bounds index, will likely cause the process to crash.
|
| 476 |
+
memory_pool : MemoryPool, optional
|
| 477 |
+
If not passed, will allocate memory from the default memory pool.
|
| 478 |
+
|
| 479 |
+
Returns
|
| 480 |
+
-------
|
| 481 |
+
result : depends on inputs
|
| 482 |
+
Selected values for the given indices
|
| 483 |
+
|
| 484 |
+
Examples
|
| 485 |
+
--------
|
| 486 |
+
>>> import pyarrow as pa
|
| 487 |
+
>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
|
| 488 |
+
>>> indices = pa.array([0, None, 4, 3])
|
| 489 |
+
>>> arr.take(indices)
|
| 490 |
+
<pyarrow.lib.StringArray object at ...>
|
| 491 |
+
[
|
| 492 |
+
"a",
|
| 493 |
+
null,
|
| 494 |
+
"e",
|
| 495 |
+
null
|
| 496 |
+
]
|
| 497 |
+
"""
|
| 498 |
+
options = TakeOptions(boundscheck=boundscheck)
|
| 499 |
+
return call_function('take', [data, indices], options, memory_pool)
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def fill_null(values, fill_value):
|
| 503 |
+
"""Replace each null element in values with a corresponding
|
| 504 |
+
element from fill_value.
|
| 505 |
+
|
| 506 |
+
If fill_value is scalar-like, then every null element in values
|
| 507 |
+
will be replaced with fill_value. If fill_value is array-like,
|
| 508 |
+
then the i-th element in values will be replaced with the i-th
|
| 509 |
+
element in fill_value.
|
| 510 |
+
|
| 511 |
+
The fill_value's type must be the same as that of values, or it
|
| 512 |
+
must be able to be implicitly casted to the array's type.
|
| 513 |
+
|
| 514 |
+
This is an alias for :func:`coalesce`.
|
| 515 |
+
|
| 516 |
+
Parameters
|
| 517 |
+
----------
|
| 518 |
+
values : Array, ChunkedArray, or Scalar-like object
|
| 519 |
+
Each null element is replaced with the corresponding value
|
| 520 |
+
from fill_value.
|
| 521 |
+
fill_value : Array, ChunkedArray, or Scalar-like object
|
| 522 |
+
If not same type as values, will attempt to cast.
|
| 523 |
+
|
| 524 |
+
Returns
|
| 525 |
+
-------
|
| 526 |
+
result : depends on inputs
|
| 527 |
+
Values with all null elements replaced
|
| 528 |
+
|
| 529 |
+
Examples
|
| 530 |
+
--------
|
| 531 |
+
>>> import pyarrow as pa
|
| 532 |
+
>>> arr = pa.array([1, 2, None, 3], type=pa.int8())
|
| 533 |
+
>>> fill_value = pa.scalar(5, type=pa.int8())
|
| 534 |
+
>>> arr.fill_null(fill_value)
|
| 535 |
+
<pyarrow.lib.Int8Array object at ...>
|
| 536 |
+
[
|
| 537 |
+
1,
|
| 538 |
+
2,
|
| 539 |
+
5,
|
| 540 |
+
3
|
| 541 |
+
]
|
| 542 |
+
>>> arr = pa.array([1, 2, None, 4, None])
|
| 543 |
+
>>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
|
| 544 |
+
<pyarrow.lib.Int64Array object at ...>
|
| 545 |
+
[
|
| 546 |
+
1,
|
| 547 |
+
2,
|
| 548 |
+
30,
|
| 549 |
+
4,
|
| 550 |
+
50
|
| 551 |
+
]
|
| 552 |
+
"""
|
| 553 |
+
if not isinstance(fill_value, (pa.Array, pa.ChunkedArray, pa.Scalar)):
|
| 554 |
+
fill_value = pa.scalar(fill_value, type=values.type)
|
| 555 |
+
elif values.type != fill_value.type:
|
| 556 |
+
fill_value = pa.scalar(fill_value.as_py(), type=values.type)
|
| 557 |
+
|
| 558 |
+
return call_function("coalesce", [values, fill_value])
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
|
| 562 |
+
"""
|
| 563 |
+
Select the indices of the top-k ordered elements from array- or table-like
|
| 564 |
+
data.
|
| 565 |
+
|
| 566 |
+
This is a specialization for :func:`select_k_unstable`. Output is not
|
| 567 |
+
guaranteed to be stable.
|
| 568 |
+
|
| 569 |
+
Parameters
|
| 570 |
+
----------
|
| 571 |
+
values : Array, ChunkedArray, RecordBatch, or Table
|
| 572 |
+
Data to sort and get top indices from.
|
| 573 |
+
k : int
|
| 574 |
+
The number of `k` elements to keep.
|
| 575 |
+
sort_keys : List-like
|
| 576 |
+
Column key names to order by when input is table-like data.
|
| 577 |
+
memory_pool : MemoryPool, optional
|
| 578 |
+
If not passed, will allocate memory from the default memory pool.
|
| 579 |
+
|
| 580 |
+
Returns
|
| 581 |
+
-------
|
| 582 |
+
result : Array
|
| 583 |
+
Indices of the top-k ordered elements
|
| 584 |
+
|
| 585 |
+
Examples
|
| 586 |
+
--------
|
| 587 |
+
>>> import pyarrow as pa
|
| 588 |
+
>>> import pyarrow.compute as pc
|
| 589 |
+
>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
|
| 590 |
+
>>> pc.top_k_unstable(arr, k=3)
|
| 591 |
+
<pyarrow.lib.UInt64Array object at ...>
|
| 592 |
+
[
|
| 593 |
+
5,
|
| 594 |
+
4,
|
| 595 |
+
2
|
| 596 |
+
]
|
| 597 |
+
"""
|
| 598 |
+
if sort_keys is None:
|
| 599 |
+
sort_keys = []
|
| 600 |
+
if isinstance(values, (pa.Array, pa.ChunkedArray)):
|
| 601 |
+
sort_keys.append(("dummy", "descending"))
|
| 602 |
+
else:
|
| 603 |
+
sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
|
| 604 |
+
options = SelectKOptions(k, sort_keys)
|
| 605 |
+
return call_function("select_k_unstable", [values], options, memory_pool)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
|
| 609 |
+
"""
|
| 610 |
+
Select the indices of the bottom-k ordered elements from
|
| 611 |
+
array- or table-like data.
|
| 612 |
+
|
| 613 |
+
This is a specialization for :func:`select_k_unstable`. Output is not
|
| 614 |
+
guaranteed to be stable.
|
| 615 |
+
|
| 616 |
+
Parameters
|
| 617 |
+
----------
|
| 618 |
+
values : Array, ChunkedArray, RecordBatch, or Table
|
| 619 |
+
Data to sort and get bottom indices from.
|
| 620 |
+
k : int
|
| 621 |
+
The number of `k` elements to keep.
|
| 622 |
+
sort_keys : List-like
|
| 623 |
+
Column key names to order by when input is table-like data.
|
| 624 |
+
memory_pool : MemoryPool, optional
|
| 625 |
+
If not passed, will allocate memory from the default memory pool.
|
| 626 |
+
|
| 627 |
+
Returns
|
| 628 |
+
-------
|
| 629 |
+
result : Array of indices
|
| 630 |
+
Indices of the bottom-k ordered elements
|
| 631 |
+
|
| 632 |
+
Examples
|
| 633 |
+
--------
|
| 634 |
+
>>> import pyarrow as pa
|
| 635 |
+
>>> import pyarrow.compute as pc
|
| 636 |
+
>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
|
| 637 |
+
>>> pc.bottom_k_unstable(arr, k=3)
|
| 638 |
+
<pyarrow.lib.UInt64Array object at ...>
|
| 639 |
+
[
|
| 640 |
+
0,
|
| 641 |
+
1,
|
| 642 |
+
2
|
| 643 |
+
]
|
| 644 |
+
"""
|
| 645 |
+
if sort_keys is None:
|
| 646 |
+
sort_keys = []
|
| 647 |
+
if isinstance(values, (pa.Array, pa.ChunkedArray)):
|
| 648 |
+
sort_keys.append(("dummy", "ascending"))
|
| 649 |
+
else:
|
| 650 |
+
sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
|
| 651 |
+
options = SelectKOptions(k, sort_keys)
|
| 652 |
+
return call_function("select_k_unstable", [values], options, memory_pool)
|
| 653 |
+
|
| 654 |
+
|
| 655 |
+
def random(n, *, initializer='system', options=None, memory_pool=None):
|
| 656 |
+
"""
|
| 657 |
+
Generate numbers in the range [0, 1).
|
| 658 |
+
|
| 659 |
+
Generated values are uniformly-distributed, double-precision
|
| 660 |
+
in range [0, 1). Algorithm and seed can be changed via RandomOptions.
|
| 661 |
+
|
| 662 |
+
Parameters
|
| 663 |
+
----------
|
| 664 |
+
n : int
|
| 665 |
+
Number of values to generate, must be greater than or equal to 0
|
| 666 |
+
initializer : int or str
|
| 667 |
+
How to initialize the underlying random generator.
|
| 668 |
+
If an integer is given, it is used as a seed.
|
| 669 |
+
If "system" is given, the random generator is initialized with
|
| 670 |
+
a system-specific source of (hopefully true) randomness.
|
| 671 |
+
Other values are invalid.
|
| 672 |
+
options : pyarrow.compute.RandomOptions, optional
|
| 673 |
+
Alternative way of passing options.
|
| 674 |
+
memory_pool : pyarrow.MemoryPool, optional
|
| 675 |
+
If not passed, will allocate memory from the default memory pool.
|
| 676 |
+
"""
|
| 677 |
+
options = RandomOptions(initializer=initializer)
|
| 678 |
+
return call_function("random", [], options, memory_pool, length=n)
|
| 679 |
+
|
| 680 |
+
|
| 681 |
+
def field(*name_or_index):
|
| 682 |
+
"""Reference a column of the dataset.
|
| 683 |
+
|
| 684 |
+
Stores only the field's name. Type and other information is known only when
|
| 685 |
+
the expression is bound to a dataset having an explicit scheme.
|
| 686 |
+
|
| 687 |
+
Nested references are allowed by passing multiple names or a tuple of
|
| 688 |
+
names. For example ``('foo', 'bar')`` references the field named "bar"
|
| 689 |
+
inside the field named "foo".
|
| 690 |
+
|
| 691 |
+
Parameters
|
| 692 |
+
----------
|
| 693 |
+
*name_or_index : string, multiple strings, tuple or int
|
| 694 |
+
The name or index of the (possibly nested) field the expression
|
| 695 |
+
references to.
|
| 696 |
+
|
| 697 |
+
Returns
|
| 698 |
+
-------
|
| 699 |
+
field_expr : Expression
|
| 700 |
+
Reference to the given field
|
| 701 |
+
|
| 702 |
+
Examples
|
| 703 |
+
--------
|
| 704 |
+
>>> import pyarrow.compute as pc
|
| 705 |
+
>>> pc.field("a")
|
| 706 |
+
<pyarrow.compute.Expression a>
|
| 707 |
+
>>> pc.field(1)
|
| 708 |
+
<pyarrow.compute.Expression FieldPath(1)>
|
| 709 |
+
>>> pc.field(("a", "b"))
|
| 710 |
+
<pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
|
| 711 |
+
>>> pc.field("a", "b")
|
| 712 |
+
<pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
|
| 713 |
+
"""
|
| 714 |
+
n = len(name_or_index)
|
| 715 |
+
if n == 1:
|
| 716 |
+
if isinstance(name_or_index[0], (str, int)):
|
| 717 |
+
return Expression._field(name_or_index[0])
|
| 718 |
+
elif isinstance(name_or_index[0], tuple):
|
| 719 |
+
return Expression._nested_field(name_or_index[0])
|
| 720 |
+
else:
|
| 721 |
+
raise TypeError(
|
| 722 |
+
"field reference should be str, multiple str, tuple or "
|
| 723 |
+
f"integer, got {type(name_or_index[0])}"
|
| 724 |
+
)
|
| 725 |
+
# In case of multiple strings not supplied in a tuple
|
| 726 |
+
else:
|
| 727 |
+
return Expression._nested_field(name_or_index)
|
| 728 |
+
|
| 729 |
+
|
| 730 |
+
def scalar(value):
|
| 731 |
+
"""Expression representing a scalar value.
|
| 732 |
+
|
| 733 |
+
Parameters
|
| 734 |
+
----------
|
| 735 |
+
value : bool, int, float or string
|
| 736 |
+
Python value of the scalar. Note that only a subset of types are
|
| 737 |
+
currently supported.
|
| 738 |
+
|
| 739 |
+
Returns
|
| 740 |
+
-------
|
| 741 |
+
scalar_expr : Expression
|
| 742 |
+
An Expression representing the scalar value
|
| 743 |
+
"""
|
| 744 |
+
return Expression._scalar(value)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/config.pxi
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
from pyarrow.includes.libarrow cimport GetBuildInfo
|
| 19 |
+
|
| 20 |
+
from collections import namedtuple
|
| 21 |
+
import os
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
|
| 25 |
+
|
| 26 |
+
BuildInfo = namedtuple(
|
| 27 |
+
'BuildInfo',
|
| 28 |
+
('version', 'version_info', 'so_version', 'full_so_version',
|
| 29 |
+
'compiler_id', 'compiler_version', 'compiler_flags',
|
| 30 |
+
'git_id', 'git_description', 'package_kind', 'build_type'))
|
| 31 |
+
|
| 32 |
+
RuntimeInfo = namedtuple('RuntimeInfo',
|
| 33 |
+
('simd_level', 'detected_simd_level'))
|
| 34 |
+
|
| 35 |
+
cdef _build_info():
|
| 36 |
+
cdef:
|
| 37 |
+
const CBuildInfo* c_info
|
| 38 |
+
|
| 39 |
+
c_info = &GetBuildInfo()
|
| 40 |
+
|
| 41 |
+
return BuildInfo(version=frombytes(c_info.version_string),
|
| 42 |
+
version_info=VersionInfo(c_info.version_major,
|
| 43 |
+
c_info.version_minor,
|
| 44 |
+
c_info.version_patch),
|
| 45 |
+
so_version=frombytes(c_info.so_version),
|
| 46 |
+
full_so_version=frombytes(c_info.full_so_version),
|
| 47 |
+
compiler_id=frombytes(c_info.compiler_id),
|
| 48 |
+
compiler_version=frombytes(c_info.compiler_version),
|
| 49 |
+
compiler_flags=frombytes(c_info.compiler_flags),
|
| 50 |
+
git_id=frombytes(c_info.git_id),
|
| 51 |
+
git_description=frombytes(c_info.git_description),
|
| 52 |
+
package_kind=frombytes(c_info.package_kind),
|
| 53 |
+
build_type=frombytes(c_info.build_type).lower(),
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
cpp_build_info = _build_info()
|
| 58 |
+
cpp_version = cpp_build_info.version
|
| 59 |
+
cpp_version_info = cpp_build_info.version_info
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def runtime_info():
|
| 63 |
+
"""
|
| 64 |
+
Get runtime information.
|
| 65 |
+
|
| 66 |
+
Returns
|
| 67 |
+
-------
|
| 68 |
+
info : pyarrow.RuntimeInfo
|
| 69 |
+
"""
|
| 70 |
+
cdef:
|
| 71 |
+
CRuntimeInfo c_info
|
| 72 |
+
|
| 73 |
+
c_info = GetRuntimeInfo()
|
| 74 |
+
|
| 75 |
+
return RuntimeInfo(
|
| 76 |
+
simd_level=frombytes(c_info.simd_level),
|
| 77 |
+
detected_simd_level=frombytes(c_info.detected_simd_level))
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def set_timezone_db_path(path):
|
| 81 |
+
"""
|
| 82 |
+
Configure the path to text timezone database on Windows.
|
| 83 |
+
|
| 84 |
+
Parameters
|
| 85 |
+
----------
|
| 86 |
+
path : str
|
| 87 |
+
Path to text timezone database.
|
| 88 |
+
"""
|
| 89 |
+
cdef:
|
| 90 |
+
CGlobalOptions options
|
| 91 |
+
|
| 92 |
+
if path is not None:
|
| 93 |
+
options.timezone_db_path = <c_string>tobytes(path)
|
| 94 |
+
|
| 95 |
+
check_status(Initialize(options))
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cuda.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# flake8: noqa
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
|
| 22 |
+
HostBuffer, BufferReader, BufferWriter,
|
| 23 |
+
new_host_buffer,
|
| 24 |
+
serialize_record_batch, read_message,
|
| 25 |
+
read_record_batch)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/error.pxi
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetInterrupt
|
| 19 |
+
|
| 20 |
+
from pyarrow.includes.libarrow cimport CStatus
|
| 21 |
+
from pyarrow.includes.libarrow_python cimport IsPyError, RestorePyError
|
| 22 |
+
from pyarrow.includes.common cimport c_string
|
| 23 |
+
|
| 24 |
+
from contextlib import contextmanager
|
| 25 |
+
import os
|
| 26 |
+
import signal
|
| 27 |
+
import threading
|
| 28 |
+
|
| 29 |
+
from pyarrow.lib import is_threading_enabled
|
| 30 |
+
from pyarrow.util import _break_traceback_cycle_from_frame
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class ArrowException(Exception):
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class ArrowInvalid(ValueError, ArrowException):
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class ArrowMemoryError(MemoryError, ArrowException):
|
| 42 |
+
pass
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class ArrowKeyError(KeyError, ArrowException):
|
| 46 |
+
def __str__(self):
|
| 47 |
+
# Override KeyError.__str__, as it uses the repr() of the key
|
| 48 |
+
return ArrowException.__str__(self)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ArrowTypeError(TypeError, ArrowException):
|
| 52 |
+
pass
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class ArrowNotImplementedError(NotImplementedError, ArrowException):
|
| 56 |
+
pass
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class ArrowCapacityError(ArrowException):
|
| 60 |
+
pass
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class ArrowIndexError(IndexError, ArrowException):
|
| 64 |
+
pass
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class ArrowSerializationError(ArrowException):
|
| 68 |
+
pass
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class ArrowCancelled(ArrowException):
|
| 72 |
+
def __init__(self, message, signum=None):
|
| 73 |
+
super().__init__(message)
|
| 74 |
+
self.signum = signum
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Compatibility alias
|
| 78 |
+
ArrowIOError = IOError
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# check_status() and convert_status() could be written directly in C++
|
| 82 |
+
# if we didn't define Arrow-specific subclasses (ArrowInvalid etc.)
|
| 83 |
+
cdef int check_status(const CStatus& status) except -1 nogil:
|
| 84 |
+
if status.ok():
|
| 85 |
+
return 0
|
| 86 |
+
|
| 87 |
+
with gil:
|
| 88 |
+
if IsPyError(status):
|
| 89 |
+
RestorePyError(status)
|
| 90 |
+
return -1
|
| 91 |
+
|
| 92 |
+
raise convert_status(status)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
cdef object convert_status(const CStatus& status):
|
| 96 |
+
if IsPyError(status):
|
| 97 |
+
try:
|
| 98 |
+
RestorePyError(status)
|
| 99 |
+
except BaseException as e:
|
| 100 |
+
return e
|
| 101 |
+
|
| 102 |
+
# We don't use Status::ToString() as it would redundantly include
|
| 103 |
+
# the C++ class name.
|
| 104 |
+
message = frombytes(status.message(), safe=True)
|
| 105 |
+
detail = status.detail()
|
| 106 |
+
if detail != nullptr:
|
| 107 |
+
message += ". Detail: " + frombytes(detail.get().ToString(),
|
| 108 |
+
safe=True)
|
| 109 |
+
|
| 110 |
+
if status.IsInvalid():
|
| 111 |
+
return ArrowInvalid(message)
|
| 112 |
+
elif status.IsIOError():
|
| 113 |
+
# Note: OSError constructor is
|
| 114 |
+
# OSError(message)
|
| 115 |
+
# or
|
| 116 |
+
# OSError(errno, message, filename=None)
|
| 117 |
+
# or (on Windows)
|
| 118 |
+
# OSError(errno, message, filename, winerror)
|
| 119 |
+
errno = ErrnoFromStatus(status)
|
| 120 |
+
winerror = WinErrorFromStatus(status)
|
| 121 |
+
if winerror != 0:
|
| 122 |
+
return IOError(errno, message, None, winerror)
|
| 123 |
+
elif errno != 0:
|
| 124 |
+
return IOError(errno, message)
|
| 125 |
+
else:
|
| 126 |
+
return IOError(message)
|
| 127 |
+
elif status.IsOutOfMemory():
|
| 128 |
+
return ArrowMemoryError(message)
|
| 129 |
+
elif status.IsKeyError():
|
| 130 |
+
return ArrowKeyError(message)
|
| 131 |
+
elif status.IsNotImplemented():
|
| 132 |
+
return ArrowNotImplementedError(message)
|
| 133 |
+
elif status.IsTypeError():
|
| 134 |
+
return ArrowTypeError(message)
|
| 135 |
+
elif status.IsCapacityError():
|
| 136 |
+
return ArrowCapacityError(message)
|
| 137 |
+
elif status.IsIndexError():
|
| 138 |
+
return ArrowIndexError(message)
|
| 139 |
+
elif status.IsSerializationError():
|
| 140 |
+
return ArrowSerializationError(message)
|
| 141 |
+
elif status.IsCancelled():
|
| 142 |
+
signum = SignalFromStatus(status)
|
| 143 |
+
if signum > 0:
|
| 144 |
+
return ArrowCancelled(message, signum)
|
| 145 |
+
else:
|
| 146 |
+
return ArrowCancelled(message)
|
| 147 |
+
else:
|
| 148 |
+
message = frombytes(status.ToString(), safe=True)
|
| 149 |
+
return ArrowException(message)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# These are API functions for C++ PyArrow
|
| 153 |
+
cdef api int pyarrow_internal_check_status(const CStatus& status) \
|
| 154 |
+
except -1 nogil:
|
| 155 |
+
return check_status(status)
|
| 156 |
+
|
| 157 |
+
cdef api object pyarrow_internal_convert_status(const CStatus& status):
|
| 158 |
+
return convert_status(status)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
cdef class StopToken:
|
| 162 |
+
cdef void init(self, CStopToken stop_token):
|
| 163 |
+
self.stop_token = move(stop_token)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
cdef c_bool signal_handlers_enabled = True
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def enable_signal_handlers(c_bool enable):
|
| 170 |
+
"""
|
| 171 |
+
Enable or disable interruption of long-running operations.
|
| 172 |
+
|
| 173 |
+
By default, certain long running operations will detect user
|
| 174 |
+
interruptions, such as by pressing Ctrl-C. This detection relies
|
| 175 |
+
on setting a signal handler for the duration of the long-running
|
| 176 |
+
operation, and may therefore interfere with other frameworks or
|
| 177 |
+
libraries (such as an event loop).
|
| 178 |
+
|
| 179 |
+
Parameters
|
| 180 |
+
----------
|
| 181 |
+
enable : bool
|
| 182 |
+
Whether to enable user interruption by setting a temporary
|
| 183 |
+
signal handler.
|
| 184 |
+
"""
|
| 185 |
+
global signal_handlers_enabled
|
| 186 |
+
signal_handlers_enabled = enable
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# For internal use
|
| 190 |
+
|
| 191 |
+
# Whether we need a workaround for https://bugs.python.org/issue42248
|
| 192 |
+
have_signal_refcycle = (sys.version_info < (3, 8, 10) or
|
| 193 |
+
(3, 9) <= sys.version_info < (3, 9, 5) or
|
| 194 |
+
sys.version_info[:2] == (3, 10))
|
| 195 |
+
|
| 196 |
+
cdef class SignalStopHandler:
|
| 197 |
+
cdef:
|
| 198 |
+
StopToken _stop_token
|
| 199 |
+
vector[int] _signals
|
| 200 |
+
c_bool _enabled
|
| 201 |
+
|
| 202 |
+
def __cinit__(self):
|
| 203 |
+
self._enabled = False
|
| 204 |
+
|
| 205 |
+
self._init_signals()
|
| 206 |
+
if have_signal_refcycle:
|
| 207 |
+
_break_traceback_cycle_from_frame(sys._getframe(0))
|
| 208 |
+
|
| 209 |
+
self._stop_token = StopToken()
|
| 210 |
+
|
| 211 |
+
if not self._signals.empty():
|
| 212 |
+
maybe_source = SetSignalStopSource()
|
| 213 |
+
if not maybe_source.ok():
|
| 214 |
+
# See ARROW-11841 / ARROW-17173: in complex interaction
|
| 215 |
+
# scenarios (such as R calling into Python), SetSignalStopSource()
|
| 216 |
+
# may have already activated a signal-receiving StopSource.
|
| 217 |
+
# Just warn instead of erroring out.
|
| 218 |
+
maybe_source.status().Warn()
|
| 219 |
+
else:
|
| 220 |
+
self._stop_token.init(deref(maybe_source).token())
|
| 221 |
+
# signals don't work on Emscripten without threads.
|
| 222 |
+
# and possibly other single-thread environments.
|
| 223 |
+
self._enabled = is_threading_enabled()
|
| 224 |
+
|
| 225 |
+
def _init_signals(self):
|
| 226 |
+
if (signal_handlers_enabled and
|
| 227 |
+
threading.current_thread() is threading.main_thread()):
|
| 228 |
+
self._signals = [
|
| 229 |
+
sig for sig in (signal.SIGINT, signal.SIGTERM)
|
| 230 |
+
if signal.getsignal(sig) not in (signal.SIG_DFL,
|
| 231 |
+
signal.SIG_IGN, None)]
|
| 232 |
+
|
| 233 |
+
def __enter__(self):
|
| 234 |
+
if self._enabled:
|
| 235 |
+
check_status(RegisterCancellingSignalHandler(self._signals))
|
| 236 |
+
return self
|
| 237 |
+
|
| 238 |
+
def __exit__(self, exc_type, exc_value, exc_tb):
|
| 239 |
+
if self._enabled:
|
| 240 |
+
UnregisterCancellingSignalHandler()
|
| 241 |
+
if exc_value is None:
|
| 242 |
+
# Make sure we didn't lose a signal
|
| 243 |
+
try:
|
| 244 |
+
check_status(self._stop_token.stop_token.Poll())
|
| 245 |
+
except ArrowCancelled as e:
|
| 246 |
+
exc_value = e
|
| 247 |
+
if isinstance(exc_value, ArrowCancelled):
|
| 248 |
+
if exc_value.signum:
|
| 249 |
+
# Re-emit the exact same signal. We restored the Python signal
|
| 250 |
+
# handler above, so it should receive it.
|
| 251 |
+
if os.name == 'nt':
|
| 252 |
+
SendSignal(exc_value.signum)
|
| 253 |
+
else:
|
| 254 |
+
SendSignalToThread(exc_value.signum,
|
| 255 |
+
threading.main_thread().ident)
|
| 256 |
+
else:
|
| 257 |
+
# Simulate Python receiving a SIGINT
|
| 258 |
+
# (see https://bugs.python.org/issue43356 for why we can't
|
| 259 |
+
# simulate the exact signal number)
|
| 260 |
+
PyErr_SetInterrupt()
|
| 261 |
+
# Maximize chances of the Python signal handler being executed now.
|
| 262 |
+
# Otherwise a potential KeyboardInterrupt might be missed by an
|
| 263 |
+
# immediately enclosing try/except block.
|
| 264 |
+
PyErr_CheckSignals()
|
| 265 |
+
# ArrowCancelled will be re-raised if PyErr_CheckSignals()
|
| 266 |
+
# returned successfully.
|
| 267 |
+
|
| 268 |
+
def __dealloc__(self):
|
| 269 |
+
if self._enabled:
|
| 270 |
+
ResetSignalStopSource()
|
| 271 |
+
|
| 272 |
+
@property
|
| 273 |
+
def stop_token(self):
|
| 274 |
+
return self._stop_token
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/fs.py
ADDED
|
@@ -0,0 +1,431 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
"""
|
| 19 |
+
FileSystem abstraction to interact with various local and remote filesystems.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from pyarrow.util import _is_path_like, _stringify_path
|
| 23 |
+
|
| 24 |
+
from pyarrow._fs import ( # noqa
|
| 25 |
+
FileSelector,
|
| 26 |
+
FileType,
|
| 27 |
+
FileInfo,
|
| 28 |
+
FileSystem,
|
| 29 |
+
LocalFileSystem,
|
| 30 |
+
SubTreeFileSystem,
|
| 31 |
+
_MockFileSystem,
|
| 32 |
+
FileSystemHandler,
|
| 33 |
+
PyFileSystem,
|
| 34 |
+
_copy_files,
|
| 35 |
+
_copy_files_selector,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# For backward compatibility.
|
| 39 |
+
FileStats = FileInfo
|
| 40 |
+
|
| 41 |
+
_not_imported = []
|
| 42 |
+
try:
|
| 43 |
+
from pyarrow._azurefs import AzureFileSystem # noqa
|
| 44 |
+
except ImportError:
|
| 45 |
+
_not_imported.append("AzureFileSystem")
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
from pyarrow._hdfs import HadoopFileSystem # noqa
|
| 49 |
+
except ImportError:
|
| 50 |
+
_not_imported.append("HadoopFileSystem")
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
from pyarrow._gcsfs import GcsFileSystem # noqa
|
| 54 |
+
except ImportError:
|
| 55 |
+
_not_imported.append("GcsFileSystem")
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
from pyarrow._s3fs import ( # noqa
|
| 59 |
+
AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
|
| 60 |
+
S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
|
| 61 |
+
finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
|
| 62 |
+
except ImportError:
|
| 63 |
+
_not_imported.append("S3FileSystem")
|
| 64 |
+
else:
|
| 65 |
+
# GH-38364: we don't initialize S3 eagerly as that could lead
|
| 66 |
+
# to crashes at shutdown even when S3 isn't used.
|
| 67 |
+
# Instead, S3 is initialized lazily using `ensure_s3_initialized`
|
| 68 |
+
# in assorted places.
|
| 69 |
+
import atexit
|
| 70 |
+
atexit.register(ensure_s3_finalized)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def __getattr__(name):
|
| 74 |
+
if name in _not_imported:
|
| 75 |
+
raise ImportError(
|
| 76 |
+
"The pyarrow installation is not built with support for "
|
| 77 |
+
"'{0}'".format(name)
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
raise AttributeError(
|
| 81 |
+
"module 'pyarrow.fs' has no attribute '{0}'".format(name)
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _filesystem_from_str(uri):
|
| 86 |
+
# instantiate the file system from an uri, if the uri has a path
|
| 87 |
+
# component then it will be treated as a path prefix
|
| 88 |
+
filesystem, prefix = FileSystem.from_uri(uri)
|
| 89 |
+
prefix = filesystem.normalize_path(prefix)
|
| 90 |
+
if prefix:
|
| 91 |
+
# validate that the prefix is pointing to a directory
|
| 92 |
+
prefix_info = filesystem.get_file_info([prefix])[0]
|
| 93 |
+
if prefix_info.type != FileType.Directory:
|
| 94 |
+
raise ValueError(
|
| 95 |
+
"The path component of the filesystem URI must point to a "
|
| 96 |
+
"directory but it has a type: `{}`. The path component "
|
| 97 |
+
"is `{}` and the given filesystem URI is `{}`".format(
|
| 98 |
+
prefix_info.type.name, prefix_info.path, uri
|
| 99 |
+
)
|
| 100 |
+
)
|
| 101 |
+
filesystem = SubTreeFileSystem(prefix, filesystem)
|
| 102 |
+
return filesystem
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _ensure_filesystem(filesystem, *, use_mmap=False):
|
| 106 |
+
if isinstance(filesystem, FileSystem):
|
| 107 |
+
return filesystem
|
| 108 |
+
elif isinstance(filesystem, str):
|
| 109 |
+
if use_mmap:
|
| 110 |
+
raise ValueError(
|
| 111 |
+
"Specifying to use memory mapping not supported for "
|
| 112 |
+
"filesystem specified as an URI string"
|
| 113 |
+
)
|
| 114 |
+
return _filesystem_from_str(filesystem)
|
| 115 |
+
|
| 116 |
+
# handle fsspec-compatible filesystems
|
| 117 |
+
try:
|
| 118 |
+
import fsspec
|
| 119 |
+
except ImportError:
|
| 120 |
+
pass
|
| 121 |
+
else:
|
| 122 |
+
if isinstance(filesystem, fsspec.AbstractFileSystem):
|
| 123 |
+
if type(filesystem).__name__ == 'LocalFileSystem':
|
| 124 |
+
# In case its a simple LocalFileSystem, use native arrow one
|
| 125 |
+
return LocalFileSystem(use_mmap=use_mmap)
|
| 126 |
+
return PyFileSystem(FSSpecHandler(filesystem))
|
| 127 |
+
|
| 128 |
+
raise TypeError(
|
| 129 |
+
"Unrecognized filesystem: {}. `filesystem` argument must be a "
|
| 130 |
+
"FileSystem instance or a valid file system URI'".format(
|
| 131 |
+
type(filesystem))
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def _resolve_filesystem_and_path(path, filesystem=None, *, memory_map=False):
|
| 136 |
+
"""
|
| 137 |
+
Return filesystem/path from path which could be an URI or a plain
|
| 138 |
+
filesystem path.
|
| 139 |
+
"""
|
| 140 |
+
if not _is_path_like(path):
|
| 141 |
+
if filesystem is not None:
|
| 142 |
+
raise ValueError(
|
| 143 |
+
"'filesystem' passed but the specified path is file-like, so"
|
| 144 |
+
" there is nothing to open with 'filesystem'."
|
| 145 |
+
)
|
| 146 |
+
return filesystem, path
|
| 147 |
+
|
| 148 |
+
if filesystem is not None:
|
| 149 |
+
filesystem = _ensure_filesystem(filesystem, use_mmap=memory_map)
|
| 150 |
+
if isinstance(filesystem, LocalFileSystem):
|
| 151 |
+
path = _stringify_path(path)
|
| 152 |
+
elif not isinstance(path, str):
|
| 153 |
+
raise TypeError(
|
| 154 |
+
"Expected string path; path-like objects are only allowed "
|
| 155 |
+
"with a local filesystem"
|
| 156 |
+
)
|
| 157 |
+
path = filesystem.normalize_path(path)
|
| 158 |
+
return filesystem, path
|
| 159 |
+
|
| 160 |
+
path = _stringify_path(path)
|
| 161 |
+
|
| 162 |
+
# if filesystem is not given, try to automatically determine one
|
| 163 |
+
# first check if the file exists as a local (relative) file path
|
| 164 |
+
# if not then try to parse the path as an URI
|
| 165 |
+
filesystem = LocalFileSystem(use_mmap=memory_map)
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
file_info = filesystem.get_file_info(path)
|
| 169 |
+
except ValueError: # ValueError means path is likely an URI
|
| 170 |
+
file_info = None
|
| 171 |
+
exists_locally = False
|
| 172 |
+
else:
|
| 173 |
+
exists_locally = (file_info.type != FileType.NotFound)
|
| 174 |
+
|
| 175 |
+
# if the file or directory doesn't exists locally, then assume that
|
| 176 |
+
# the path is an URI describing the file system as well
|
| 177 |
+
if not exists_locally:
|
| 178 |
+
try:
|
| 179 |
+
filesystem, path = FileSystem.from_uri(path)
|
| 180 |
+
except ValueError as e:
|
| 181 |
+
# neither an URI nor a locally existing path, so assume that
|
| 182 |
+
# local path was given and propagate a nicer file not found error
|
| 183 |
+
# instead of a more confusing scheme parsing error
|
| 184 |
+
if "empty scheme" not in str(e) \
|
| 185 |
+
and "Cannot parse URI" not in str(e):
|
| 186 |
+
raise
|
| 187 |
+
else:
|
| 188 |
+
path = filesystem.normalize_path(path)
|
| 189 |
+
|
| 190 |
+
return filesystem, path
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def copy_files(source, destination,
|
| 194 |
+
source_filesystem=None, destination_filesystem=None,
|
| 195 |
+
*, chunk_size=1024*1024, use_threads=True):
|
| 196 |
+
"""
|
| 197 |
+
Copy files between FileSystems.
|
| 198 |
+
|
| 199 |
+
This functions allows you to recursively copy directories of files from
|
| 200 |
+
one file system to another, such as from S3 to your local machine.
|
| 201 |
+
|
| 202 |
+
Parameters
|
| 203 |
+
----------
|
| 204 |
+
source : string
|
| 205 |
+
Source file path or URI to a single file or directory.
|
| 206 |
+
If a directory, files will be copied recursively from this path.
|
| 207 |
+
destination : string
|
| 208 |
+
Destination file path or URI. If `source` is a file, `destination`
|
| 209 |
+
is also interpreted as the destination file (not directory).
|
| 210 |
+
Directories will be created as necessary.
|
| 211 |
+
source_filesystem : FileSystem, optional
|
| 212 |
+
Source filesystem, needs to be specified if `source` is not a URI,
|
| 213 |
+
otherwise inferred.
|
| 214 |
+
destination_filesystem : FileSystem, optional
|
| 215 |
+
Destination filesystem, needs to be specified if `destination` is not
|
| 216 |
+
a URI, otherwise inferred.
|
| 217 |
+
chunk_size : int, default 1MB
|
| 218 |
+
The maximum size of block to read before flushing to the
|
| 219 |
+
destination file. A larger chunk_size will use more memory while
|
| 220 |
+
copying but may help accommodate high latency FileSystems.
|
| 221 |
+
use_threads : bool, default True
|
| 222 |
+
Whether to use multiple threads to accelerate copying.
|
| 223 |
+
|
| 224 |
+
Examples
|
| 225 |
+
--------
|
| 226 |
+
Inspect an S3 bucket's files:
|
| 227 |
+
|
| 228 |
+
>>> s3, path = fs.FileSystem.from_uri(
|
| 229 |
+
... "s3://registry.opendata.aws/roda/ndjson/")
|
| 230 |
+
>>> selector = fs.FileSelector(path)
|
| 231 |
+
>>> s3.get_file_info(selector)
|
| 232 |
+
[<FileInfo for 'registry.opendata.aws/roda/ndjson/index.ndjson':...]
|
| 233 |
+
|
| 234 |
+
Copy one file from S3 bucket to a local directory:
|
| 235 |
+
|
| 236 |
+
>>> fs.copy_files("s3://registry.opendata.aws/roda/ndjson/index.ndjson",
|
| 237 |
+
... "file:///{}/index_copy.ndjson".format(local_path))
|
| 238 |
+
|
| 239 |
+
>>> fs.LocalFileSystem().get_file_info(str(local_path)+
|
| 240 |
+
... '/index_copy.ndjson')
|
| 241 |
+
<FileInfo for '.../index_copy.ndjson': type=FileType.File, size=...>
|
| 242 |
+
|
| 243 |
+
Copy file using a FileSystem object:
|
| 244 |
+
|
| 245 |
+
>>> fs.copy_files("registry.opendata.aws/roda/ndjson/index.ndjson",
|
| 246 |
+
... "file:///{}/index_copy.ndjson".format(local_path),
|
| 247 |
+
... source_filesystem=fs.S3FileSystem())
|
| 248 |
+
"""
|
| 249 |
+
source_fs, source_path = _resolve_filesystem_and_path(
|
| 250 |
+
source, source_filesystem
|
| 251 |
+
)
|
| 252 |
+
destination_fs, destination_path = _resolve_filesystem_and_path(
|
| 253 |
+
destination, destination_filesystem
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
file_info = source_fs.get_file_info(source_path)
|
| 257 |
+
if file_info.type == FileType.Directory:
|
| 258 |
+
source_sel = FileSelector(source_path, recursive=True)
|
| 259 |
+
_copy_files_selector(source_fs, source_sel,
|
| 260 |
+
destination_fs, destination_path,
|
| 261 |
+
chunk_size, use_threads)
|
| 262 |
+
else:
|
| 263 |
+
_copy_files(source_fs, source_path,
|
| 264 |
+
destination_fs, destination_path,
|
| 265 |
+
chunk_size, use_threads)
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
class FSSpecHandler(FileSystemHandler):
|
| 269 |
+
"""
|
| 270 |
+
Handler for fsspec-based Python filesystems.
|
| 271 |
+
|
| 272 |
+
https://filesystem-spec.readthedocs.io/en/latest/index.html
|
| 273 |
+
|
| 274 |
+
Parameters
|
| 275 |
+
----------
|
| 276 |
+
fs : FSSpec-compliant filesystem instance
|
| 277 |
+
|
| 278 |
+
Examples
|
| 279 |
+
--------
|
| 280 |
+
>>> PyFileSystem(FSSpecHandler(fsspec_fs)) # doctest: +SKIP
|
| 281 |
+
"""
|
| 282 |
+
|
| 283 |
+
def __init__(self, fs):
|
| 284 |
+
self.fs = fs
|
| 285 |
+
|
| 286 |
+
def __eq__(self, other):
|
| 287 |
+
if isinstance(other, FSSpecHandler):
|
| 288 |
+
return self.fs == other.fs
|
| 289 |
+
return NotImplemented
|
| 290 |
+
|
| 291 |
+
def __ne__(self, other):
|
| 292 |
+
if isinstance(other, FSSpecHandler):
|
| 293 |
+
return self.fs != other.fs
|
| 294 |
+
return NotImplemented
|
| 295 |
+
|
| 296 |
+
def get_type_name(self):
|
| 297 |
+
protocol = self.fs.protocol
|
| 298 |
+
if isinstance(protocol, list):
|
| 299 |
+
protocol = protocol[0]
|
| 300 |
+
return "fsspec+{0}".format(protocol)
|
| 301 |
+
|
| 302 |
+
def normalize_path(self, path):
|
| 303 |
+
return path
|
| 304 |
+
|
| 305 |
+
@staticmethod
|
| 306 |
+
def _create_file_info(path, info):
|
| 307 |
+
size = info["size"]
|
| 308 |
+
if info["type"] == "file":
|
| 309 |
+
ftype = FileType.File
|
| 310 |
+
elif info["type"] == "directory":
|
| 311 |
+
ftype = FileType.Directory
|
| 312 |
+
# some fsspec filesystems include a file size for directories
|
| 313 |
+
size = None
|
| 314 |
+
else:
|
| 315 |
+
ftype = FileType.Unknown
|
| 316 |
+
return FileInfo(path, ftype, size=size, mtime=info.get("mtime", None))
|
| 317 |
+
|
| 318 |
+
def get_file_info(self, paths):
|
| 319 |
+
infos = []
|
| 320 |
+
for path in paths:
|
| 321 |
+
try:
|
| 322 |
+
info = self.fs.info(path)
|
| 323 |
+
except FileNotFoundError:
|
| 324 |
+
infos.append(FileInfo(path, FileType.NotFound))
|
| 325 |
+
else:
|
| 326 |
+
infos.append(self._create_file_info(path, info))
|
| 327 |
+
return infos
|
| 328 |
+
|
| 329 |
+
def get_file_info_selector(self, selector):
|
| 330 |
+
if not self.fs.isdir(selector.base_dir):
|
| 331 |
+
if self.fs.exists(selector.base_dir):
|
| 332 |
+
raise NotADirectoryError(selector.base_dir)
|
| 333 |
+
else:
|
| 334 |
+
if selector.allow_not_found:
|
| 335 |
+
return []
|
| 336 |
+
else:
|
| 337 |
+
raise FileNotFoundError(selector.base_dir)
|
| 338 |
+
|
| 339 |
+
if selector.recursive:
|
| 340 |
+
maxdepth = None
|
| 341 |
+
else:
|
| 342 |
+
maxdepth = 1
|
| 343 |
+
|
| 344 |
+
infos = []
|
| 345 |
+
selected_files = self.fs.find(
|
| 346 |
+
selector.base_dir, maxdepth=maxdepth, withdirs=True, detail=True
|
| 347 |
+
)
|
| 348 |
+
for path, info in selected_files.items():
|
| 349 |
+
_path = path.strip("/")
|
| 350 |
+
base_dir = selector.base_dir.strip("/")
|
| 351 |
+
# Need to exclude base directory from selected files if present
|
| 352 |
+
# (fsspec filesystems, see GH-37555)
|
| 353 |
+
if _path != base_dir:
|
| 354 |
+
infos.append(self._create_file_info(path, info))
|
| 355 |
+
|
| 356 |
+
return infos
|
| 357 |
+
|
| 358 |
+
def create_dir(self, path, recursive):
|
| 359 |
+
# mkdir also raises FileNotFoundError when base directory is not found
|
| 360 |
+
try:
|
| 361 |
+
self.fs.mkdir(path, create_parents=recursive)
|
| 362 |
+
except FileExistsError:
|
| 363 |
+
pass
|
| 364 |
+
|
| 365 |
+
def delete_dir(self, path):
|
| 366 |
+
self.fs.rm(path, recursive=True)
|
| 367 |
+
|
| 368 |
+
def _delete_dir_contents(self, path, missing_dir_ok):
|
| 369 |
+
try:
|
| 370 |
+
subpaths = self.fs.listdir(path, detail=False)
|
| 371 |
+
except FileNotFoundError:
|
| 372 |
+
if missing_dir_ok:
|
| 373 |
+
return
|
| 374 |
+
raise
|
| 375 |
+
for subpath in subpaths:
|
| 376 |
+
if self.fs.isdir(subpath):
|
| 377 |
+
self.fs.rm(subpath, recursive=True)
|
| 378 |
+
elif self.fs.isfile(subpath):
|
| 379 |
+
self.fs.rm(subpath)
|
| 380 |
+
|
| 381 |
+
def delete_dir_contents(self, path, missing_dir_ok):
|
| 382 |
+
if path.strip("/") == "":
|
| 383 |
+
raise ValueError(
|
| 384 |
+
"delete_dir_contents called on path '", path, "'")
|
| 385 |
+
self._delete_dir_contents(path, missing_dir_ok)
|
| 386 |
+
|
| 387 |
+
def delete_root_dir_contents(self):
|
| 388 |
+
self._delete_dir_contents("/")
|
| 389 |
+
|
| 390 |
+
def delete_file(self, path):
|
| 391 |
+
# fs.rm correctly raises IsADirectoryError when `path` is a directory
|
| 392 |
+
# instead of a file and `recursive` is not set to True
|
| 393 |
+
if not self.fs.exists(path):
|
| 394 |
+
raise FileNotFoundError(path)
|
| 395 |
+
self.fs.rm(path)
|
| 396 |
+
|
| 397 |
+
def move(self, src, dest):
|
| 398 |
+
self.fs.mv(src, dest, recursive=True)
|
| 399 |
+
|
| 400 |
+
def copy_file(self, src, dest):
|
| 401 |
+
# fs.copy correctly raises IsADirectoryError when `src` is a directory
|
| 402 |
+
# instead of a file
|
| 403 |
+
self.fs.copy(src, dest)
|
| 404 |
+
|
| 405 |
+
# TODO can we read/pass metadata (e.g. Content-Type) in the methods below?
|
| 406 |
+
|
| 407 |
+
def open_input_stream(self, path):
|
| 408 |
+
from pyarrow import PythonFile
|
| 409 |
+
|
| 410 |
+
if not self.fs.isfile(path):
|
| 411 |
+
raise FileNotFoundError(path)
|
| 412 |
+
|
| 413 |
+
return PythonFile(self.fs.open(path, mode="rb"), mode="r")
|
| 414 |
+
|
| 415 |
+
def open_input_file(self, path):
|
| 416 |
+
from pyarrow import PythonFile
|
| 417 |
+
|
| 418 |
+
if not self.fs.isfile(path):
|
| 419 |
+
raise FileNotFoundError(path)
|
| 420 |
+
|
| 421 |
+
return PythonFile(self.fs.open(path, mode="rb"), mode="r")
|
| 422 |
+
|
| 423 |
+
def open_output_stream(self, path, metadata):
|
| 424 |
+
from pyarrow import PythonFile
|
| 425 |
+
|
| 426 |
+
return PythonFile(self.fs.open(path, mode="wb"), mode="w")
|
| 427 |
+
|
| 428 |
+
def open_append_stream(self, path, metadata):
|
| 429 |
+
from pyarrow import PythonFile
|
| 430 |
+
|
| 431 |
+
return PythonFile(self.fs.open(path, mode="ab"), mode="w")
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.pxi
ADDED
|
@@ -0,0 +1,1403 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New
|
| 19 |
+
|
| 20 |
+
from collections import namedtuple
|
| 21 |
+
import warnings
|
| 22 |
+
from cython import sizeof
|
| 23 |
+
|
| 24 |
+
cpdef enum MetadataVersion:
|
| 25 |
+
V1 = <char> CMetadataVersion_V1
|
| 26 |
+
V2 = <char> CMetadataVersion_V2
|
| 27 |
+
V3 = <char> CMetadataVersion_V3
|
| 28 |
+
V4 = <char> CMetadataVersion_V4
|
| 29 |
+
V5 = <char> CMetadataVersion_V5
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
cdef object _wrap_metadata_version(CMetadataVersion version):
|
| 33 |
+
return MetadataVersion(<char> version)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
cdef CMetadataVersion _unwrap_metadata_version(
|
| 37 |
+
MetadataVersion version) except *:
|
| 38 |
+
if version == MetadataVersion.V1:
|
| 39 |
+
return CMetadataVersion_V1
|
| 40 |
+
elif version == MetadataVersion.V2:
|
| 41 |
+
return CMetadataVersion_V2
|
| 42 |
+
elif version == MetadataVersion.V3:
|
| 43 |
+
return CMetadataVersion_V3
|
| 44 |
+
elif version == MetadataVersion.V4:
|
| 45 |
+
return CMetadataVersion_V4
|
| 46 |
+
elif version == MetadataVersion.V5:
|
| 47 |
+
return CMetadataVersion_V5
|
| 48 |
+
raise ValueError("Not a metadata version: " + repr(version))
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
_WriteStats = namedtuple(
|
| 52 |
+
'WriteStats',
|
| 53 |
+
('num_messages', 'num_record_batches', 'num_dictionary_batches',
|
| 54 |
+
'num_dictionary_deltas', 'num_replaced_dictionaries'))
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class WriteStats(_WriteStats):
|
| 58 |
+
"""IPC write statistics
|
| 59 |
+
|
| 60 |
+
Parameters
|
| 61 |
+
----------
|
| 62 |
+
num_messages : int
|
| 63 |
+
Number of messages.
|
| 64 |
+
num_record_batches : int
|
| 65 |
+
Number of record batches.
|
| 66 |
+
num_dictionary_batches : int
|
| 67 |
+
Number of dictionary batches.
|
| 68 |
+
num_dictionary_deltas : int
|
| 69 |
+
Delta of dictionaries.
|
| 70 |
+
num_replaced_dictionaries : int
|
| 71 |
+
Number of replaced dictionaries.
|
| 72 |
+
"""
|
| 73 |
+
__slots__ = ()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
@staticmethod
|
| 77 |
+
cdef _wrap_write_stats(CIpcWriteStats c):
|
| 78 |
+
return WriteStats(c.num_messages, c.num_record_batches,
|
| 79 |
+
c.num_dictionary_batches, c.num_dictionary_deltas,
|
| 80 |
+
c.num_replaced_dictionaries)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
_ReadStats = namedtuple(
|
| 84 |
+
'ReadStats',
|
| 85 |
+
('num_messages', 'num_record_batches', 'num_dictionary_batches',
|
| 86 |
+
'num_dictionary_deltas', 'num_replaced_dictionaries'))
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class ReadStats(_ReadStats):
|
| 90 |
+
"""IPC read statistics
|
| 91 |
+
|
| 92 |
+
Parameters
|
| 93 |
+
----------
|
| 94 |
+
num_messages : int
|
| 95 |
+
Number of messages.
|
| 96 |
+
num_record_batches : int
|
| 97 |
+
Number of record batches.
|
| 98 |
+
num_dictionary_batches : int
|
| 99 |
+
Number of dictionary batches.
|
| 100 |
+
num_dictionary_deltas : int
|
| 101 |
+
Delta of dictionaries.
|
| 102 |
+
num_replaced_dictionaries : int
|
| 103 |
+
Number of replaced dictionaries.
|
| 104 |
+
"""
|
| 105 |
+
__slots__ = ()
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@staticmethod
|
| 109 |
+
cdef _wrap_read_stats(CIpcReadStats c):
|
| 110 |
+
return ReadStats(c.num_messages, c.num_record_batches,
|
| 111 |
+
c.num_dictionary_batches, c.num_dictionary_deltas,
|
| 112 |
+
c.num_replaced_dictionaries)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
cdef class IpcReadOptions(_Weakrefable):
|
| 116 |
+
"""
|
| 117 |
+
Serialization options for reading IPC format.
|
| 118 |
+
|
| 119 |
+
Parameters
|
| 120 |
+
----------
|
| 121 |
+
ensure_native_endian : bool, default True
|
| 122 |
+
Whether to convert incoming data to platform-native endianness.
|
| 123 |
+
use_threads : bool
|
| 124 |
+
Whether to use the global CPU thread pool to parallelize any
|
| 125 |
+
computational tasks like decompression
|
| 126 |
+
included_fields : list
|
| 127 |
+
If empty (the default), return all deserialized fields.
|
| 128 |
+
If non-empty, the values are the indices of fields to read on
|
| 129 |
+
the top-level schema
|
| 130 |
+
"""
|
| 131 |
+
__slots__ = ()
|
| 132 |
+
|
| 133 |
+
# cdef block is in lib.pxd
|
| 134 |
+
|
| 135 |
+
def __init__(self, *, bint ensure_native_endian=True,
|
| 136 |
+
bint use_threads=True, list included_fields=None):
|
| 137 |
+
self.c_options = CIpcReadOptions.Defaults()
|
| 138 |
+
self.ensure_native_endian = ensure_native_endian
|
| 139 |
+
self.use_threads = use_threads
|
| 140 |
+
if included_fields is not None:
|
| 141 |
+
self.included_fields = included_fields
|
| 142 |
+
|
| 143 |
+
@property
|
| 144 |
+
def ensure_native_endian(self):
|
| 145 |
+
return self.c_options.ensure_native_endian
|
| 146 |
+
|
| 147 |
+
@ensure_native_endian.setter
|
| 148 |
+
def ensure_native_endian(self, bint value):
|
| 149 |
+
self.c_options.ensure_native_endian = value
|
| 150 |
+
|
| 151 |
+
@property
|
| 152 |
+
def use_threads(self):
|
| 153 |
+
return self.c_options.use_threads
|
| 154 |
+
|
| 155 |
+
@use_threads.setter
|
| 156 |
+
def use_threads(self, bint value):
|
| 157 |
+
self.c_options.use_threads = value
|
| 158 |
+
|
| 159 |
+
@property
|
| 160 |
+
def included_fields(self):
|
| 161 |
+
return self.c_options.included_fields
|
| 162 |
+
|
| 163 |
+
@included_fields.setter
|
| 164 |
+
def included_fields(self, list value not None):
|
| 165 |
+
self.c_options.included_fields = value
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
cdef class IpcWriteOptions(_Weakrefable):
|
| 169 |
+
"""
|
| 170 |
+
Serialization options for the IPC format.
|
| 171 |
+
|
| 172 |
+
Parameters
|
| 173 |
+
----------
|
| 174 |
+
metadata_version : MetadataVersion, default MetadataVersion.V5
|
| 175 |
+
The metadata version to write. V5 is the current and latest,
|
| 176 |
+
V4 is the pre-1.0 metadata version (with incompatible Union layout).
|
| 177 |
+
allow_64bit : bool, default False
|
| 178 |
+
If true, allow field lengths that don't fit in a signed 32-bit int.
|
| 179 |
+
use_legacy_format : bool, default False
|
| 180 |
+
Whether to use the pre-Arrow 0.15 IPC format.
|
| 181 |
+
compression : str, Codec, or None
|
| 182 |
+
compression codec to use for record batch buffers.
|
| 183 |
+
If None then batch buffers will be uncompressed.
|
| 184 |
+
Must be "lz4", "zstd" or None.
|
| 185 |
+
To specify a compression_level use `pyarrow.Codec`
|
| 186 |
+
use_threads : bool
|
| 187 |
+
Whether to use the global CPU thread pool to parallelize any
|
| 188 |
+
computational tasks like compression.
|
| 189 |
+
emit_dictionary_deltas : bool
|
| 190 |
+
Whether to emit dictionary deltas. Default is false for maximum
|
| 191 |
+
stream compatibility.
|
| 192 |
+
unify_dictionaries : bool
|
| 193 |
+
If true then calls to write_table will attempt to unify dictionaries
|
| 194 |
+
across all batches in the table. This can help avoid the need for
|
| 195 |
+
replacement dictionaries (which the file format does not support)
|
| 196 |
+
but requires computing the unified dictionary and then remapping
|
| 197 |
+
the indices arrays.
|
| 198 |
+
|
| 199 |
+
This parameter is ignored when writing to the IPC stream format as
|
| 200 |
+
the IPC stream format can support replacement dictionaries.
|
| 201 |
+
"""
|
| 202 |
+
__slots__ = ()
|
| 203 |
+
|
| 204 |
+
# cdef block is in lib.pxd
|
| 205 |
+
|
| 206 |
+
def __init__(self, *, metadata_version=MetadataVersion.V5,
|
| 207 |
+
bint allow_64bit=False, use_legacy_format=False,
|
| 208 |
+
compression=None, bint use_threads=True,
|
| 209 |
+
bint emit_dictionary_deltas=False,
|
| 210 |
+
bint unify_dictionaries=False):
|
| 211 |
+
self.c_options = CIpcWriteOptions.Defaults()
|
| 212 |
+
self.allow_64bit = allow_64bit
|
| 213 |
+
self.use_legacy_format = use_legacy_format
|
| 214 |
+
self.metadata_version = metadata_version
|
| 215 |
+
if compression is not None:
|
| 216 |
+
self.compression = compression
|
| 217 |
+
self.use_threads = use_threads
|
| 218 |
+
self.emit_dictionary_deltas = emit_dictionary_deltas
|
| 219 |
+
self.unify_dictionaries = unify_dictionaries
|
| 220 |
+
|
| 221 |
+
@property
|
| 222 |
+
def allow_64bit(self):
|
| 223 |
+
return self.c_options.allow_64bit
|
| 224 |
+
|
| 225 |
+
@allow_64bit.setter
|
| 226 |
+
def allow_64bit(self, bint value):
|
| 227 |
+
self.c_options.allow_64bit = value
|
| 228 |
+
|
| 229 |
+
@property
|
| 230 |
+
def use_legacy_format(self):
|
| 231 |
+
return self.c_options.write_legacy_ipc_format
|
| 232 |
+
|
| 233 |
+
@use_legacy_format.setter
|
| 234 |
+
def use_legacy_format(self, bint value):
|
| 235 |
+
self.c_options.write_legacy_ipc_format = value
|
| 236 |
+
|
| 237 |
+
@property
|
| 238 |
+
def metadata_version(self):
|
| 239 |
+
return _wrap_metadata_version(self.c_options.metadata_version)
|
| 240 |
+
|
| 241 |
+
@metadata_version.setter
|
| 242 |
+
def metadata_version(self, value):
|
| 243 |
+
self.c_options.metadata_version = _unwrap_metadata_version(value)
|
| 244 |
+
|
| 245 |
+
@property
|
| 246 |
+
def compression(self):
|
| 247 |
+
if self.c_options.codec == nullptr:
|
| 248 |
+
return None
|
| 249 |
+
else:
|
| 250 |
+
return frombytes(self.c_options.codec.get().name())
|
| 251 |
+
|
| 252 |
+
@compression.setter
|
| 253 |
+
def compression(self, value):
|
| 254 |
+
if value is None:
|
| 255 |
+
self.c_options.codec.reset()
|
| 256 |
+
elif isinstance(value, str):
|
| 257 |
+
codec_type = _ensure_compression(value)
|
| 258 |
+
if codec_type != CCompressionType_ZSTD and codec_type != CCompressionType_LZ4_FRAME:
|
| 259 |
+
raise ValueError("Compression type must be lz4, zstd or None")
|
| 260 |
+
self.c_options.codec = shared_ptr[CCodec](GetResultValue(
|
| 261 |
+
CCodec.Create(codec_type)).release())
|
| 262 |
+
elif isinstance(value, Codec):
|
| 263 |
+
if value.name != "lz4" and value.name != "zstd":
|
| 264 |
+
raise ValueError("Compression type must be lz4, zstd or None")
|
| 265 |
+
self.c_options.codec = (<Codec>value).wrapped
|
| 266 |
+
else:
|
| 267 |
+
raise TypeError(
|
| 268 |
+
"Property `compression` must be None, str, or pyarrow.Codec")
|
| 269 |
+
|
| 270 |
+
@property
|
| 271 |
+
def use_threads(self):
|
| 272 |
+
return self.c_options.use_threads
|
| 273 |
+
|
| 274 |
+
@use_threads.setter
|
| 275 |
+
def use_threads(self, bint value):
|
| 276 |
+
self.c_options.use_threads = value
|
| 277 |
+
|
| 278 |
+
@property
|
| 279 |
+
def emit_dictionary_deltas(self):
|
| 280 |
+
return self.c_options.emit_dictionary_deltas
|
| 281 |
+
|
| 282 |
+
@emit_dictionary_deltas.setter
|
| 283 |
+
def emit_dictionary_deltas(self, bint value):
|
| 284 |
+
self.c_options.emit_dictionary_deltas = value
|
| 285 |
+
|
| 286 |
+
@property
|
| 287 |
+
def unify_dictionaries(self):
|
| 288 |
+
return self.c_options.unify_dictionaries
|
| 289 |
+
|
| 290 |
+
@unify_dictionaries.setter
|
| 291 |
+
def unify_dictionaries(self, bint value):
|
| 292 |
+
self.c_options.unify_dictionaries = value
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
cdef class Message(_Weakrefable):
|
| 296 |
+
"""
|
| 297 |
+
Container for an Arrow IPC message with metadata and optional body
|
| 298 |
+
"""
|
| 299 |
+
|
| 300 |
+
def __cinit__(self):
|
| 301 |
+
pass
|
| 302 |
+
|
| 303 |
+
def __init__(self):
|
| 304 |
+
raise TypeError("Do not call {}'s constructor directly, use "
|
| 305 |
+
"`pyarrow.ipc.read_message` function instead."
|
| 306 |
+
.format(self.__class__.__name__))
|
| 307 |
+
|
| 308 |
+
@property
|
| 309 |
+
def type(self):
|
| 310 |
+
return frombytes(FormatMessageType(self.message.get().type()))
|
| 311 |
+
|
| 312 |
+
@property
|
| 313 |
+
def metadata(self):
|
| 314 |
+
return pyarrow_wrap_buffer(self.message.get().metadata())
|
| 315 |
+
|
| 316 |
+
@property
|
| 317 |
+
def metadata_version(self):
|
| 318 |
+
return _wrap_metadata_version(self.message.get().metadata_version())
|
| 319 |
+
|
| 320 |
+
@property
|
| 321 |
+
def body(self):
|
| 322 |
+
cdef shared_ptr[CBuffer] body = self.message.get().body()
|
| 323 |
+
if body.get() == NULL:
|
| 324 |
+
return None
|
| 325 |
+
else:
|
| 326 |
+
return pyarrow_wrap_buffer(body)
|
| 327 |
+
|
| 328 |
+
def equals(self, Message other):
|
| 329 |
+
"""
|
| 330 |
+
Returns True if the message contents (metadata and body) are identical
|
| 331 |
+
|
| 332 |
+
Parameters
|
| 333 |
+
----------
|
| 334 |
+
other : Message
|
| 335 |
+
|
| 336 |
+
Returns
|
| 337 |
+
-------
|
| 338 |
+
are_equal : bool
|
| 339 |
+
"""
|
| 340 |
+
cdef c_bool result
|
| 341 |
+
with nogil:
|
| 342 |
+
result = self.message.get().Equals(deref(other.message.get()))
|
| 343 |
+
return result
|
| 344 |
+
|
| 345 |
+
def serialize_to(self, NativeFile sink, alignment=8, memory_pool=None):
|
| 346 |
+
"""
|
| 347 |
+
Write message to generic OutputStream
|
| 348 |
+
|
| 349 |
+
Parameters
|
| 350 |
+
----------
|
| 351 |
+
sink : NativeFile
|
| 352 |
+
alignment : int, default 8
|
| 353 |
+
Byte alignment for metadata and body
|
| 354 |
+
memory_pool : MemoryPool, default None
|
| 355 |
+
Uses default memory pool if not specified
|
| 356 |
+
"""
|
| 357 |
+
cdef:
|
| 358 |
+
int64_t output_length = 0
|
| 359 |
+
COutputStream* out
|
| 360 |
+
CIpcWriteOptions options
|
| 361 |
+
|
| 362 |
+
options.alignment = alignment
|
| 363 |
+
out = sink.get_output_stream().get()
|
| 364 |
+
with nogil:
|
| 365 |
+
check_status(self.message.get()
|
| 366 |
+
.SerializeTo(out, options, &output_length))
|
| 367 |
+
|
| 368 |
+
def serialize(self, alignment=8, memory_pool=None):
|
| 369 |
+
"""
|
| 370 |
+
Write message as encapsulated IPC message
|
| 371 |
+
|
| 372 |
+
Parameters
|
| 373 |
+
----------
|
| 374 |
+
alignment : int, default 8
|
| 375 |
+
Byte alignment for metadata and body
|
| 376 |
+
memory_pool : MemoryPool, default None
|
| 377 |
+
Uses default memory pool if not specified
|
| 378 |
+
|
| 379 |
+
Returns
|
| 380 |
+
-------
|
| 381 |
+
serialized : Buffer
|
| 382 |
+
"""
|
| 383 |
+
stream = BufferOutputStream(memory_pool)
|
| 384 |
+
self.serialize_to(stream, alignment=alignment, memory_pool=memory_pool)
|
| 385 |
+
return stream.getvalue()
|
| 386 |
+
|
| 387 |
+
def __repr__(self):
|
| 388 |
+
if self.message == nullptr:
|
| 389 |
+
return """pyarrow.Message(uninitialized)"""
|
| 390 |
+
|
| 391 |
+
metadata_len = self.metadata.size
|
| 392 |
+
body = self.body
|
| 393 |
+
body_len = 0 if body is None else body.size
|
| 394 |
+
|
| 395 |
+
return """pyarrow.Message
|
| 396 |
+
type: {0}
|
| 397 |
+
metadata length: {1}
|
| 398 |
+
body length: {2}""".format(self.type, metadata_len, body_len)
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
cdef class MessageReader(_Weakrefable):
|
| 402 |
+
"""
|
| 403 |
+
Interface for reading Message objects from some source (like an
|
| 404 |
+
InputStream)
|
| 405 |
+
"""
|
| 406 |
+
cdef:
|
| 407 |
+
unique_ptr[CMessageReader] reader
|
| 408 |
+
|
| 409 |
+
def __cinit__(self):
|
| 410 |
+
pass
|
| 411 |
+
|
| 412 |
+
def __init__(self):
|
| 413 |
+
raise TypeError("Do not call {}'s constructor directly, use "
|
| 414 |
+
"`pyarrow.ipc.MessageReader.open_stream` function "
|
| 415 |
+
"instead.".format(self.__class__.__name__))
|
| 416 |
+
|
| 417 |
+
@staticmethod
|
| 418 |
+
def open_stream(source):
|
| 419 |
+
"""
|
| 420 |
+
Open stream from source, if you want to use memory map use
|
| 421 |
+
MemoryMappedFile as source.
|
| 422 |
+
|
| 423 |
+
Parameters
|
| 424 |
+
----------
|
| 425 |
+
source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
|
| 426 |
+
A readable source, like an InputStream
|
| 427 |
+
"""
|
| 428 |
+
cdef:
|
| 429 |
+
MessageReader result = MessageReader.__new__(MessageReader)
|
| 430 |
+
shared_ptr[CInputStream] in_stream
|
| 431 |
+
unique_ptr[CMessageReader] reader
|
| 432 |
+
|
| 433 |
+
_get_input_stream(source, &in_stream)
|
| 434 |
+
with nogil:
|
| 435 |
+
reader = CMessageReader.Open(in_stream)
|
| 436 |
+
result.reader.reset(reader.release())
|
| 437 |
+
|
| 438 |
+
return result
|
| 439 |
+
|
| 440 |
+
def __iter__(self):
|
| 441 |
+
return self
|
| 442 |
+
|
| 443 |
+
def __next__(self):
|
| 444 |
+
return self.read_next_message()
|
| 445 |
+
|
| 446 |
+
def read_next_message(self):
|
| 447 |
+
"""
|
| 448 |
+
Read next Message from the stream.
|
| 449 |
+
|
| 450 |
+
Raises
|
| 451 |
+
------
|
| 452 |
+
StopIteration
|
| 453 |
+
At end of stream
|
| 454 |
+
"""
|
| 455 |
+
cdef Message result = Message.__new__(Message)
|
| 456 |
+
|
| 457 |
+
with nogil:
|
| 458 |
+
result.message = move(GetResultValue(self.reader.get()
|
| 459 |
+
.ReadNextMessage()))
|
| 460 |
+
|
| 461 |
+
if result.message.get() == NULL:
|
| 462 |
+
raise StopIteration
|
| 463 |
+
|
| 464 |
+
return result
|
| 465 |
+
|
| 466 |
+
# ----------------------------------------------------------------------
|
| 467 |
+
# File and stream readers and writers
|
| 468 |
+
|
| 469 |
+
cdef class _CRecordBatchWriter(_Weakrefable):
|
| 470 |
+
"""The base RecordBatchWriter wrapper.
|
| 471 |
+
|
| 472 |
+
Provides common implementations of convenience methods. Should not
|
| 473 |
+
be instantiated directly by user code.
|
| 474 |
+
"""
|
| 475 |
+
|
| 476 |
+
# cdef block is in lib.pxd
|
| 477 |
+
|
| 478 |
+
def write(self, table_or_batch):
|
| 479 |
+
"""
|
| 480 |
+
Write RecordBatch or Table to stream.
|
| 481 |
+
|
| 482 |
+
Parameters
|
| 483 |
+
----------
|
| 484 |
+
table_or_batch : {RecordBatch, Table}
|
| 485 |
+
"""
|
| 486 |
+
if isinstance(table_or_batch, RecordBatch):
|
| 487 |
+
self.write_batch(table_or_batch)
|
| 488 |
+
elif isinstance(table_or_batch, Table):
|
| 489 |
+
self.write_table(table_or_batch)
|
| 490 |
+
else:
|
| 491 |
+
raise ValueError(type(table_or_batch))
|
| 492 |
+
|
| 493 |
+
def write_batch(self, RecordBatch batch, custom_metadata=None):
|
| 494 |
+
"""
|
| 495 |
+
Write RecordBatch to stream.
|
| 496 |
+
|
| 497 |
+
Parameters
|
| 498 |
+
----------
|
| 499 |
+
batch : RecordBatch
|
| 500 |
+
custom_metadata : mapping or KeyValueMetadata
|
| 501 |
+
Keys and values must be string-like / coercible to bytes
|
| 502 |
+
"""
|
| 503 |
+
metadata = ensure_metadata(custom_metadata, allow_none=True)
|
| 504 |
+
c_meta = pyarrow_unwrap_metadata(metadata)
|
| 505 |
+
|
| 506 |
+
with nogil:
|
| 507 |
+
check_status(self.writer.get()
|
| 508 |
+
.WriteRecordBatch(deref(batch.batch), c_meta))
|
| 509 |
+
|
| 510 |
+
def write_table(self, Table table, max_chunksize=None):
|
| 511 |
+
"""
|
| 512 |
+
Write Table to stream in (contiguous) RecordBatch objects.
|
| 513 |
+
|
| 514 |
+
Parameters
|
| 515 |
+
----------
|
| 516 |
+
table : Table
|
| 517 |
+
max_chunksize : int, default None
|
| 518 |
+
Maximum number of rows for RecordBatch chunks. Individual chunks may
|
| 519 |
+
be smaller depending on the chunk layout of individual columns.
|
| 520 |
+
"""
|
| 521 |
+
cdef:
|
| 522 |
+
# max_chunksize must be > 0 to have any impact
|
| 523 |
+
int64_t c_max_chunksize = -1
|
| 524 |
+
|
| 525 |
+
if max_chunksize is not None:
|
| 526 |
+
c_max_chunksize = max_chunksize
|
| 527 |
+
|
| 528 |
+
with nogil:
|
| 529 |
+
check_status(self.writer.get().WriteTable(table.table[0],
|
| 530 |
+
c_max_chunksize))
|
| 531 |
+
|
| 532 |
+
def close(self):
|
| 533 |
+
"""
|
| 534 |
+
Close stream and write end-of-stream 0 marker.
|
| 535 |
+
"""
|
| 536 |
+
with nogil:
|
| 537 |
+
check_status(self.writer.get().Close())
|
| 538 |
+
|
| 539 |
+
def __enter__(self):
|
| 540 |
+
return self
|
| 541 |
+
|
| 542 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 543 |
+
self.close()
|
| 544 |
+
|
| 545 |
+
@property
|
| 546 |
+
def stats(self):
|
| 547 |
+
"""
|
| 548 |
+
Current IPC write statistics.
|
| 549 |
+
"""
|
| 550 |
+
if not self.writer:
|
| 551 |
+
raise ValueError("Operation on closed writer")
|
| 552 |
+
return _wrap_write_stats(self.writer.get().stats())
|
| 553 |
+
|
| 554 |
+
|
| 555 |
+
cdef class _RecordBatchStreamWriter(_CRecordBatchWriter):
|
| 556 |
+
cdef:
|
| 557 |
+
CIpcWriteOptions options
|
| 558 |
+
bint closed
|
| 559 |
+
|
| 560 |
+
def __cinit__(self):
|
| 561 |
+
pass
|
| 562 |
+
|
| 563 |
+
def __dealloc__(self):
|
| 564 |
+
pass
|
| 565 |
+
|
| 566 |
+
@property
|
| 567 |
+
def _use_legacy_format(self):
|
| 568 |
+
# For testing (see test_ipc.py)
|
| 569 |
+
return self.options.write_legacy_ipc_format
|
| 570 |
+
|
| 571 |
+
@property
|
| 572 |
+
def _metadata_version(self):
|
| 573 |
+
# For testing (see test_ipc.py)
|
| 574 |
+
return _wrap_metadata_version(self.options.metadata_version)
|
| 575 |
+
|
| 576 |
+
def _open(self, sink, Schema schema not None,
|
| 577 |
+
IpcWriteOptions options=IpcWriteOptions()):
|
| 578 |
+
cdef:
|
| 579 |
+
shared_ptr[COutputStream] c_sink
|
| 580 |
+
|
| 581 |
+
self.options = options.c_options
|
| 582 |
+
get_writer(sink, &c_sink)
|
| 583 |
+
with nogil:
|
| 584 |
+
self.writer = GetResultValue(
|
| 585 |
+
MakeStreamWriter(c_sink, schema.sp_schema,
|
| 586 |
+
self.options))
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
cdef _get_input_stream(object source, shared_ptr[CInputStream]* out):
|
| 590 |
+
try:
|
| 591 |
+
source = as_buffer(source)
|
| 592 |
+
except TypeError:
|
| 593 |
+
# Non-buffer-like
|
| 594 |
+
pass
|
| 595 |
+
|
| 596 |
+
get_input_stream(source, True, out)
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
class _ReadPandasMixin:
|
| 600 |
+
|
| 601 |
+
def read_pandas(self, **options):
|
| 602 |
+
"""
|
| 603 |
+
Read contents of stream to a pandas.DataFrame.
|
| 604 |
+
|
| 605 |
+
Read all record batches as a pyarrow.Table then convert it to a
|
| 606 |
+
pandas.DataFrame using Table.to_pandas.
|
| 607 |
+
|
| 608 |
+
Parameters
|
| 609 |
+
----------
|
| 610 |
+
**options
|
| 611 |
+
Arguments to forward to :meth:`Table.to_pandas`.
|
| 612 |
+
|
| 613 |
+
Returns
|
| 614 |
+
-------
|
| 615 |
+
df : pandas.DataFrame
|
| 616 |
+
"""
|
| 617 |
+
table = self.read_all()
|
| 618 |
+
return table.to_pandas(**options)
|
| 619 |
+
|
| 620 |
+
|
| 621 |
+
cdef class RecordBatchReader(_Weakrefable):
|
| 622 |
+
"""Base class for reading stream of record batches.
|
| 623 |
+
|
| 624 |
+
Record batch readers function as iterators of record batches that also
|
| 625 |
+
provide the schema (without the need to get any batches).
|
| 626 |
+
|
| 627 |
+
Warnings
|
| 628 |
+
--------
|
| 629 |
+
Do not call this class's constructor directly, use one of the
|
| 630 |
+
``RecordBatchReader.from_*`` functions instead.
|
| 631 |
+
|
| 632 |
+
Notes
|
| 633 |
+
-----
|
| 634 |
+
To import and export using the Arrow C stream interface, use the
|
| 635 |
+
``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
|
| 636 |
+
interface is intended for expert users.
|
| 637 |
+
|
| 638 |
+
Examples
|
| 639 |
+
--------
|
| 640 |
+
>>> import pyarrow as pa
|
| 641 |
+
>>> schema = pa.schema([('x', pa.int64())])
|
| 642 |
+
>>> def iter_record_batches():
|
| 643 |
+
... for i in range(2):
|
| 644 |
+
... yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
|
| 645 |
+
>>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
|
| 646 |
+
>>> print(reader.schema)
|
| 647 |
+
x: int64
|
| 648 |
+
>>> for batch in reader:
|
| 649 |
+
... print(batch)
|
| 650 |
+
pyarrow.RecordBatch
|
| 651 |
+
x: int64
|
| 652 |
+
----
|
| 653 |
+
x: [1,2,3]
|
| 654 |
+
pyarrow.RecordBatch
|
| 655 |
+
x: int64
|
| 656 |
+
----
|
| 657 |
+
x: [1,2,3]
|
| 658 |
+
"""
|
| 659 |
+
|
| 660 |
+
# cdef block is in lib.pxd
|
| 661 |
+
|
| 662 |
+
def __init__(self):
|
| 663 |
+
raise TypeError("Do not call {}'s constructor directly, "
|
| 664 |
+
"use one of the RecordBatchReader.from_* functions instead."
|
| 665 |
+
.format(self.__class__.__name__))
|
| 666 |
+
|
| 667 |
+
def __iter__(self):
|
| 668 |
+
return self
|
| 669 |
+
|
| 670 |
+
def __next__(self):
|
| 671 |
+
return self.read_next_batch()
|
| 672 |
+
|
| 673 |
+
@property
|
| 674 |
+
def schema(self):
|
| 675 |
+
"""
|
| 676 |
+
Shared schema of the record batches in the stream.
|
| 677 |
+
|
| 678 |
+
Returns
|
| 679 |
+
-------
|
| 680 |
+
Schema
|
| 681 |
+
"""
|
| 682 |
+
cdef shared_ptr[CSchema] c_schema
|
| 683 |
+
|
| 684 |
+
with nogil:
|
| 685 |
+
c_schema = self.reader.get().schema()
|
| 686 |
+
|
| 687 |
+
return pyarrow_wrap_schema(c_schema)
|
| 688 |
+
|
| 689 |
+
def read_next_batch(self):
|
| 690 |
+
"""
|
| 691 |
+
Read next RecordBatch from the stream.
|
| 692 |
+
|
| 693 |
+
Raises
|
| 694 |
+
------
|
| 695 |
+
StopIteration:
|
| 696 |
+
At end of stream.
|
| 697 |
+
|
| 698 |
+
Returns
|
| 699 |
+
-------
|
| 700 |
+
RecordBatch
|
| 701 |
+
"""
|
| 702 |
+
cdef shared_ptr[CRecordBatch] batch
|
| 703 |
+
|
| 704 |
+
with nogil:
|
| 705 |
+
check_status(self.reader.get().ReadNext(&batch))
|
| 706 |
+
|
| 707 |
+
if batch.get() == NULL:
|
| 708 |
+
raise StopIteration
|
| 709 |
+
|
| 710 |
+
return pyarrow_wrap_batch(batch)
|
| 711 |
+
|
| 712 |
+
def read_next_batch_with_custom_metadata(self):
|
| 713 |
+
"""
|
| 714 |
+
Read next RecordBatch from the stream along with its custom metadata.
|
| 715 |
+
|
| 716 |
+
Raises
|
| 717 |
+
------
|
| 718 |
+
StopIteration:
|
| 719 |
+
At end of stream.
|
| 720 |
+
|
| 721 |
+
Returns
|
| 722 |
+
-------
|
| 723 |
+
batch : RecordBatch
|
| 724 |
+
custom_metadata : KeyValueMetadata
|
| 725 |
+
"""
|
| 726 |
+
cdef:
|
| 727 |
+
CRecordBatchWithMetadata batch_with_metadata
|
| 728 |
+
|
| 729 |
+
with nogil:
|
| 730 |
+
batch_with_metadata = GetResultValue(self.reader.get().ReadNext())
|
| 731 |
+
|
| 732 |
+
if batch_with_metadata.batch.get() == NULL:
|
| 733 |
+
raise StopIteration
|
| 734 |
+
|
| 735 |
+
return _wrap_record_batch_with_metadata(batch_with_metadata)
|
| 736 |
+
|
| 737 |
+
def iter_batches_with_custom_metadata(self):
|
| 738 |
+
"""
|
| 739 |
+
Iterate over record batches from the stream along with their custom
|
| 740 |
+
metadata.
|
| 741 |
+
|
| 742 |
+
Yields
|
| 743 |
+
------
|
| 744 |
+
RecordBatchWithMetadata
|
| 745 |
+
"""
|
| 746 |
+
while True:
|
| 747 |
+
try:
|
| 748 |
+
yield self.read_next_batch_with_custom_metadata()
|
| 749 |
+
except StopIteration:
|
| 750 |
+
return
|
| 751 |
+
|
| 752 |
+
def read_all(self):
|
| 753 |
+
"""
|
| 754 |
+
Read all record batches as a pyarrow.Table.
|
| 755 |
+
|
| 756 |
+
Returns
|
| 757 |
+
-------
|
| 758 |
+
Table
|
| 759 |
+
"""
|
| 760 |
+
cdef shared_ptr[CTable] table
|
| 761 |
+
with nogil:
|
| 762 |
+
check_status(self.reader.get().ToTable().Value(&table))
|
| 763 |
+
return pyarrow_wrap_table(table)
|
| 764 |
+
|
| 765 |
+
read_pandas = _ReadPandasMixin.read_pandas
|
| 766 |
+
|
| 767 |
+
def close(self):
|
| 768 |
+
"""
|
| 769 |
+
Release any resources associated with the reader.
|
| 770 |
+
"""
|
| 771 |
+
with nogil:
|
| 772 |
+
check_status(self.reader.get().Close())
|
| 773 |
+
|
| 774 |
+
def __enter__(self):
|
| 775 |
+
return self
|
| 776 |
+
|
| 777 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 778 |
+
self.close()
|
| 779 |
+
|
| 780 |
+
def cast(self, target_schema):
|
| 781 |
+
"""
|
| 782 |
+
Wrap this reader with one that casts each batch lazily as it is pulled.
|
| 783 |
+
Currently only a safe cast to target_schema is implemented.
|
| 784 |
+
|
| 785 |
+
Parameters
|
| 786 |
+
----------
|
| 787 |
+
target_schema : Schema
|
| 788 |
+
Schema to cast to, the names and order of fields must match.
|
| 789 |
+
|
| 790 |
+
Returns
|
| 791 |
+
-------
|
| 792 |
+
RecordBatchReader
|
| 793 |
+
"""
|
| 794 |
+
cdef:
|
| 795 |
+
shared_ptr[CSchema] c_schema
|
| 796 |
+
shared_ptr[CRecordBatchReader] c_reader
|
| 797 |
+
RecordBatchReader out
|
| 798 |
+
|
| 799 |
+
if self.schema.names != target_schema.names:
|
| 800 |
+
raise ValueError("Target schema's field names are not matching "
|
| 801 |
+
f"the table's field names: {self.schema.names}, "
|
| 802 |
+
f"{target_schema.names}")
|
| 803 |
+
|
| 804 |
+
c_schema = pyarrow_unwrap_schema(target_schema)
|
| 805 |
+
c_reader = GetResultValue(CCastingRecordBatchReader.Make(
|
| 806 |
+
self.reader, c_schema))
|
| 807 |
+
|
| 808 |
+
out = RecordBatchReader.__new__(RecordBatchReader)
|
| 809 |
+
out.reader = c_reader
|
| 810 |
+
return out
|
| 811 |
+
|
| 812 |
+
def _export_to_c(self, out_ptr):
|
| 813 |
+
"""
|
| 814 |
+
Export to a C ArrowArrayStream struct, given its pointer.
|
| 815 |
+
|
| 816 |
+
Parameters
|
| 817 |
+
----------
|
| 818 |
+
out_ptr: int
|
| 819 |
+
The raw pointer to a C ArrowArrayStream struct.
|
| 820 |
+
|
| 821 |
+
Be careful: if you don't pass the ArrowArrayStream struct to a
|
| 822 |
+
consumer, array memory will leak. This is a low-level function
|
| 823 |
+
intended for expert users.
|
| 824 |
+
"""
|
| 825 |
+
cdef:
|
| 826 |
+
void* c_ptr = _as_c_pointer(out_ptr)
|
| 827 |
+
with nogil:
|
| 828 |
+
check_status(ExportRecordBatchReader(
|
| 829 |
+
self.reader, <ArrowArrayStream*> c_ptr))
|
| 830 |
+
|
| 831 |
+
@staticmethod
|
| 832 |
+
def _import_from_c(in_ptr):
|
| 833 |
+
"""
|
| 834 |
+
Import RecordBatchReader from a C ArrowArrayStream struct,
|
| 835 |
+
given its pointer.
|
| 836 |
+
|
| 837 |
+
Parameters
|
| 838 |
+
----------
|
| 839 |
+
in_ptr: int
|
| 840 |
+
The raw pointer to a C ArrowArrayStream struct.
|
| 841 |
+
|
| 842 |
+
This is a low-level function intended for expert users.
|
| 843 |
+
"""
|
| 844 |
+
cdef:
|
| 845 |
+
void* c_ptr = _as_c_pointer(in_ptr)
|
| 846 |
+
shared_ptr[CRecordBatchReader] c_reader
|
| 847 |
+
RecordBatchReader self
|
| 848 |
+
|
| 849 |
+
with nogil:
|
| 850 |
+
c_reader = GetResultValue(ImportRecordBatchReader(
|
| 851 |
+
<ArrowArrayStream*> c_ptr))
|
| 852 |
+
|
| 853 |
+
self = RecordBatchReader.__new__(RecordBatchReader)
|
| 854 |
+
self.reader = c_reader
|
| 855 |
+
return self
|
| 856 |
+
|
| 857 |
+
def __arrow_c_stream__(self, requested_schema=None):
|
| 858 |
+
"""
|
| 859 |
+
Export to a C ArrowArrayStream PyCapsule.
|
| 860 |
+
|
| 861 |
+
Parameters
|
| 862 |
+
----------
|
| 863 |
+
requested_schema : PyCapsule, default None
|
| 864 |
+
The schema to which the stream should be casted, passed as a
|
| 865 |
+
PyCapsule containing a C ArrowSchema representation of the
|
| 866 |
+
requested schema.
|
| 867 |
+
|
| 868 |
+
Returns
|
| 869 |
+
-------
|
| 870 |
+
PyCapsule
|
| 871 |
+
A capsule containing a C ArrowArrayStream struct.
|
| 872 |
+
"""
|
| 873 |
+
cdef:
|
| 874 |
+
ArrowArrayStream* c_stream
|
| 875 |
+
|
| 876 |
+
if requested_schema is not None:
|
| 877 |
+
out_schema = Schema._import_from_c_capsule(requested_schema)
|
| 878 |
+
if self.schema != out_schema:
|
| 879 |
+
return self.cast(out_schema).__arrow_c_stream__()
|
| 880 |
+
|
| 881 |
+
stream_capsule = alloc_c_stream(&c_stream)
|
| 882 |
+
|
| 883 |
+
with nogil:
|
| 884 |
+
check_status(ExportRecordBatchReader(self.reader, c_stream))
|
| 885 |
+
|
| 886 |
+
return stream_capsule
|
| 887 |
+
|
| 888 |
+
@staticmethod
|
| 889 |
+
def _import_from_c_capsule(stream):
|
| 890 |
+
"""
|
| 891 |
+
Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
|
| 892 |
+
|
| 893 |
+
Parameters
|
| 894 |
+
----------
|
| 895 |
+
stream: PyCapsule
|
| 896 |
+
A capsule containing a C ArrowArrayStream PyCapsule.
|
| 897 |
+
|
| 898 |
+
Returns
|
| 899 |
+
-------
|
| 900 |
+
RecordBatchReader
|
| 901 |
+
"""
|
| 902 |
+
cdef:
|
| 903 |
+
ArrowArrayStream* c_stream
|
| 904 |
+
shared_ptr[CRecordBatchReader] c_reader
|
| 905 |
+
RecordBatchReader self
|
| 906 |
+
|
| 907 |
+
c_stream = <ArrowArrayStream*>PyCapsule_GetPointer(
|
| 908 |
+
stream, 'arrow_array_stream'
|
| 909 |
+
)
|
| 910 |
+
|
| 911 |
+
with nogil:
|
| 912 |
+
c_reader = GetResultValue(ImportRecordBatchReader(c_stream))
|
| 913 |
+
|
| 914 |
+
self = RecordBatchReader.__new__(RecordBatchReader)
|
| 915 |
+
self.reader = c_reader
|
| 916 |
+
return self
|
| 917 |
+
|
| 918 |
+
@staticmethod
|
| 919 |
+
def from_stream(data, schema=None):
|
| 920 |
+
"""
|
| 921 |
+
Create RecordBatchReader from a Arrow-compatible stream object.
|
| 922 |
+
|
| 923 |
+
This accepts objects implementing the Arrow PyCapsule Protocol for
|
| 924 |
+
streams, i.e. objects that have a ``__arrow_c_stream__`` method.
|
| 925 |
+
|
| 926 |
+
Parameters
|
| 927 |
+
----------
|
| 928 |
+
data : Arrow-compatible stream object
|
| 929 |
+
Any object that implements the Arrow PyCapsule Protocol for
|
| 930 |
+
streams.
|
| 931 |
+
schema : Schema, default None
|
| 932 |
+
The schema to which the stream should be casted, if supported
|
| 933 |
+
by the stream object.
|
| 934 |
+
|
| 935 |
+
Returns
|
| 936 |
+
-------
|
| 937 |
+
RecordBatchReader
|
| 938 |
+
"""
|
| 939 |
+
|
| 940 |
+
if not hasattr(data, "__arrow_c_stream__"):
|
| 941 |
+
raise TypeError(
|
| 942 |
+
"Expected an object implementing the Arrow PyCapsule Protocol for "
|
| 943 |
+
"streams (i.e. having a `__arrow_c_stream__` method), "
|
| 944 |
+
f"got {type(data)!r}."
|
| 945 |
+
)
|
| 946 |
+
|
| 947 |
+
if schema is not None:
|
| 948 |
+
if not hasattr(schema, "__arrow_c_schema__"):
|
| 949 |
+
raise TypeError(
|
| 950 |
+
"Expected an object implementing the Arrow PyCapsule Protocol for "
|
| 951 |
+
"schema (i.e. having a `__arrow_c_schema__` method), "
|
| 952 |
+
f"got {type(schema)!r}."
|
| 953 |
+
)
|
| 954 |
+
requested = schema.__arrow_c_schema__()
|
| 955 |
+
else:
|
| 956 |
+
requested = None
|
| 957 |
+
|
| 958 |
+
capsule = data.__arrow_c_stream__(requested)
|
| 959 |
+
return RecordBatchReader._import_from_c_capsule(capsule)
|
| 960 |
+
|
| 961 |
+
@staticmethod
|
| 962 |
+
def from_batches(Schema schema not None, batches):
|
| 963 |
+
"""
|
| 964 |
+
Create RecordBatchReader from an iterable of batches.
|
| 965 |
+
|
| 966 |
+
Parameters
|
| 967 |
+
----------
|
| 968 |
+
schema : Schema
|
| 969 |
+
The shared schema of the record batches
|
| 970 |
+
batches : Iterable[RecordBatch]
|
| 971 |
+
The batches that this reader will return.
|
| 972 |
+
|
| 973 |
+
Returns
|
| 974 |
+
-------
|
| 975 |
+
reader : RecordBatchReader
|
| 976 |
+
"""
|
| 977 |
+
cdef:
|
| 978 |
+
shared_ptr[CSchema] c_schema
|
| 979 |
+
shared_ptr[CRecordBatchReader] c_reader
|
| 980 |
+
RecordBatchReader self
|
| 981 |
+
|
| 982 |
+
c_schema = pyarrow_unwrap_schema(schema)
|
| 983 |
+
c_reader = GetResultValue(CPyRecordBatchReader.Make(
|
| 984 |
+
c_schema, batches))
|
| 985 |
+
|
| 986 |
+
self = RecordBatchReader.__new__(RecordBatchReader)
|
| 987 |
+
self.reader = c_reader
|
| 988 |
+
return self
|
| 989 |
+
|
| 990 |
+
|
| 991 |
+
cdef class _RecordBatchStreamReader(RecordBatchReader):
|
| 992 |
+
cdef:
|
| 993 |
+
shared_ptr[CInputStream] in_stream
|
| 994 |
+
CIpcReadOptions options
|
| 995 |
+
CRecordBatchStreamReader* stream_reader
|
| 996 |
+
|
| 997 |
+
def __cinit__(self):
|
| 998 |
+
pass
|
| 999 |
+
|
| 1000 |
+
def _open(self, source, IpcReadOptions options=IpcReadOptions(),
|
| 1001 |
+
MemoryPool memory_pool=None):
|
| 1002 |
+
self.options = options.c_options
|
| 1003 |
+
self.options.memory_pool = maybe_unbox_memory_pool(memory_pool)
|
| 1004 |
+
_get_input_stream(source, &self.in_stream)
|
| 1005 |
+
with nogil:
|
| 1006 |
+
self.reader = GetResultValue(CRecordBatchStreamReader.Open(
|
| 1007 |
+
self.in_stream, self.options))
|
| 1008 |
+
self.stream_reader = <CRecordBatchStreamReader*> self.reader.get()
|
| 1009 |
+
|
| 1010 |
+
@property
|
| 1011 |
+
def stats(self):
|
| 1012 |
+
"""
|
| 1013 |
+
Current IPC read statistics.
|
| 1014 |
+
"""
|
| 1015 |
+
if not self.reader:
|
| 1016 |
+
raise ValueError("Operation on closed reader")
|
| 1017 |
+
return _wrap_read_stats(self.stream_reader.stats())
|
| 1018 |
+
|
| 1019 |
+
|
| 1020 |
+
cdef class _RecordBatchFileWriter(_RecordBatchStreamWriter):
|
| 1021 |
+
|
| 1022 |
+
def _open(self, sink, Schema schema not None,
|
| 1023 |
+
IpcWriteOptions options=IpcWriteOptions()):
|
| 1024 |
+
cdef:
|
| 1025 |
+
shared_ptr[COutputStream] c_sink
|
| 1026 |
+
|
| 1027 |
+
self.options = options.c_options
|
| 1028 |
+
get_writer(sink, &c_sink)
|
| 1029 |
+
with nogil:
|
| 1030 |
+
self.writer = GetResultValue(
|
| 1031 |
+
MakeFileWriter(c_sink, schema.sp_schema, self.options))
|
| 1032 |
+
|
| 1033 |
+
_RecordBatchWithMetadata = namedtuple(
|
| 1034 |
+
'RecordBatchWithMetadata',
|
| 1035 |
+
('batch', 'custom_metadata'))
|
| 1036 |
+
|
| 1037 |
+
|
| 1038 |
+
class RecordBatchWithMetadata(_RecordBatchWithMetadata):
|
| 1039 |
+
"""RecordBatch with its custom metadata
|
| 1040 |
+
|
| 1041 |
+
Parameters
|
| 1042 |
+
----------
|
| 1043 |
+
batch : RecordBatch
|
| 1044 |
+
custom_metadata : KeyValueMetadata
|
| 1045 |
+
"""
|
| 1046 |
+
__slots__ = ()
|
| 1047 |
+
|
| 1048 |
+
|
| 1049 |
+
@staticmethod
|
| 1050 |
+
cdef _wrap_record_batch_with_metadata(CRecordBatchWithMetadata c):
|
| 1051 |
+
return RecordBatchWithMetadata(pyarrow_wrap_batch(c.batch),
|
| 1052 |
+
pyarrow_wrap_metadata(c.custom_metadata))
|
| 1053 |
+
|
| 1054 |
+
|
| 1055 |
+
cdef class _RecordBatchFileReader(_Weakrefable):
|
| 1056 |
+
cdef:
|
| 1057 |
+
SharedPtrNoGIL[CRecordBatchFileReader] reader
|
| 1058 |
+
shared_ptr[CRandomAccessFile] file
|
| 1059 |
+
CIpcReadOptions options
|
| 1060 |
+
|
| 1061 |
+
cdef readonly:
|
| 1062 |
+
Schema schema
|
| 1063 |
+
|
| 1064 |
+
def __cinit__(self):
|
| 1065 |
+
pass
|
| 1066 |
+
|
| 1067 |
+
def _open(self, source, footer_offset=None,
|
| 1068 |
+
IpcReadOptions options=IpcReadOptions(),
|
| 1069 |
+
MemoryPool memory_pool=None):
|
| 1070 |
+
self.options = options.c_options
|
| 1071 |
+
self.options.memory_pool = maybe_unbox_memory_pool(memory_pool)
|
| 1072 |
+
try:
|
| 1073 |
+
source = as_buffer(source)
|
| 1074 |
+
except TypeError:
|
| 1075 |
+
pass
|
| 1076 |
+
|
| 1077 |
+
get_reader(source, False, &self.file)
|
| 1078 |
+
|
| 1079 |
+
cdef int64_t offset = 0
|
| 1080 |
+
if footer_offset is not None:
|
| 1081 |
+
offset = footer_offset
|
| 1082 |
+
|
| 1083 |
+
with nogil:
|
| 1084 |
+
if offset != 0:
|
| 1085 |
+
self.reader = GetResultValue(
|
| 1086 |
+
CRecordBatchFileReader.Open2(self.file.get(), offset,
|
| 1087 |
+
self.options))
|
| 1088 |
+
|
| 1089 |
+
else:
|
| 1090 |
+
self.reader = GetResultValue(
|
| 1091 |
+
CRecordBatchFileReader.Open(self.file.get(),
|
| 1092 |
+
self.options))
|
| 1093 |
+
|
| 1094 |
+
self.schema = pyarrow_wrap_schema(self.reader.get().schema())
|
| 1095 |
+
|
| 1096 |
+
@property
|
| 1097 |
+
def num_record_batches(self):
|
| 1098 |
+
"""
|
| 1099 |
+
The number of record batches in the IPC file.
|
| 1100 |
+
"""
|
| 1101 |
+
return self.reader.get().num_record_batches()
|
| 1102 |
+
|
| 1103 |
+
def get_batch(self, int i):
|
| 1104 |
+
"""
|
| 1105 |
+
Read the record batch with the given index.
|
| 1106 |
+
|
| 1107 |
+
Parameters
|
| 1108 |
+
----------
|
| 1109 |
+
i : int
|
| 1110 |
+
The index of the record batch in the IPC file.
|
| 1111 |
+
|
| 1112 |
+
Returns
|
| 1113 |
+
-------
|
| 1114 |
+
batch : RecordBatch
|
| 1115 |
+
"""
|
| 1116 |
+
cdef shared_ptr[CRecordBatch] batch
|
| 1117 |
+
|
| 1118 |
+
if i < 0 or i >= self.num_record_batches:
|
| 1119 |
+
raise ValueError('Batch number {0} out of range'.format(i))
|
| 1120 |
+
|
| 1121 |
+
with nogil:
|
| 1122 |
+
batch = GetResultValue(self.reader.get().ReadRecordBatch(i))
|
| 1123 |
+
|
| 1124 |
+
return pyarrow_wrap_batch(batch)
|
| 1125 |
+
|
| 1126 |
+
# TODO(wesm): ARROW-503: Function was renamed. Remove after a period of
|
| 1127 |
+
# time has passed
|
| 1128 |
+
get_record_batch = get_batch
|
| 1129 |
+
|
| 1130 |
+
def get_batch_with_custom_metadata(self, int i):
|
| 1131 |
+
"""
|
| 1132 |
+
Read the record batch with the given index along with
|
| 1133 |
+
its custom metadata
|
| 1134 |
+
|
| 1135 |
+
Parameters
|
| 1136 |
+
----------
|
| 1137 |
+
i : int
|
| 1138 |
+
The index of the record batch in the IPC file.
|
| 1139 |
+
|
| 1140 |
+
Returns
|
| 1141 |
+
-------
|
| 1142 |
+
batch : RecordBatch
|
| 1143 |
+
custom_metadata : KeyValueMetadata
|
| 1144 |
+
"""
|
| 1145 |
+
cdef:
|
| 1146 |
+
CRecordBatchWithMetadata batch_with_metadata
|
| 1147 |
+
|
| 1148 |
+
if i < 0 or i >= self.num_record_batches:
|
| 1149 |
+
raise ValueError('Batch number {0} out of range'.format(i))
|
| 1150 |
+
|
| 1151 |
+
with nogil:
|
| 1152 |
+
batch_with_metadata = GetResultValue(
|
| 1153 |
+
self.reader.get().ReadRecordBatchWithCustomMetadata(i))
|
| 1154 |
+
|
| 1155 |
+
return _wrap_record_batch_with_metadata(batch_with_metadata)
|
| 1156 |
+
|
| 1157 |
+
def read_all(self):
|
| 1158 |
+
"""
|
| 1159 |
+
Read all record batches as a pyarrow.Table
|
| 1160 |
+
"""
|
| 1161 |
+
cdef:
|
| 1162 |
+
vector[shared_ptr[CRecordBatch]] batches
|
| 1163 |
+
shared_ptr[CTable] table
|
| 1164 |
+
int i, nbatches
|
| 1165 |
+
|
| 1166 |
+
nbatches = self.num_record_batches
|
| 1167 |
+
|
| 1168 |
+
batches.resize(nbatches)
|
| 1169 |
+
with nogil:
|
| 1170 |
+
for i in range(nbatches):
|
| 1171 |
+
batches[i] = GetResultValue(self.reader.get()
|
| 1172 |
+
.ReadRecordBatch(i))
|
| 1173 |
+
table = GetResultValue(
|
| 1174 |
+
CTable.FromRecordBatches(self.schema.sp_schema, move(batches)))
|
| 1175 |
+
|
| 1176 |
+
return pyarrow_wrap_table(table)
|
| 1177 |
+
|
| 1178 |
+
read_pandas = _ReadPandasMixin.read_pandas
|
| 1179 |
+
|
| 1180 |
+
def __enter__(self):
|
| 1181 |
+
return self
|
| 1182 |
+
|
| 1183 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
| 1184 |
+
pass
|
| 1185 |
+
|
| 1186 |
+
@property
|
| 1187 |
+
def stats(self):
|
| 1188 |
+
"""
|
| 1189 |
+
Current IPC read statistics.
|
| 1190 |
+
"""
|
| 1191 |
+
if not self.reader:
|
| 1192 |
+
raise ValueError("Operation on closed reader")
|
| 1193 |
+
return _wrap_read_stats(self.reader.get().stats())
|
| 1194 |
+
|
| 1195 |
+
|
| 1196 |
+
def get_tensor_size(Tensor tensor):
|
| 1197 |
+
"""
|
| 1198 |
+
Return total size of serialized Tensor including metadata and padding.
|
| 1199 |
+
|
| 1200 |
+
Parameters
|
| 1201 |
+
----------
|
| 1202 |
+
tensor : Tensor
|
| 1203 |
+
The tensor for which we want to known the size.
|
| 1204 |
+
"""
|
| 1205 |
+
cdef int64_t size
|
| 1206 |
+
with nogil:
|
| 1207 |
+
check_status(GetTensorSize(deref(tensor.tp), &size))
|
| 1208 |
+
return size
|
| 1209 |
+
|
| 1210 |
+
|
| 1211 |
+
def get_record_batch_size(RecordBatch batch):
|
| 1212 |
+
"""
|
| 1213 |
+
Return total size of serialized RecordBatch including metadata and padding.
|
| 1214 |
+
|
| 1215 |
+
Parameters
|
| 1216 |
+
----------
|
| 1217 |
+
batch : RecordBatch
|
| 1218 |
+
The recordbatch for which we want to know the size.
|
| 1219 |
+
"""
|
| 1220 |
+
cdef int64_t size
|
| 1221 |
+
with nogil:
|
| 1222 |
+
check_status(GetRecordBatchSize(deref(batch.batch), &size))
|
| 1223 |
+
return size
|
| 1224 |
+
|
| 1225 |
+
|
| 1226 |
+
def write_tensor(Tensor tensor, NativeFile dest):
|
| 1227 |
+
"""
|
| 1228 |
+
Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
|
| 1229 |
+
|
| 1230 |
+
Parameters
|
| 1231 |
+
----------
|
| 1232 |
+
tensor : pyarrow.Tensor
|
| 1233 |
+
dest : pyarrow.NativeFile
|
| 1234 |
+
|
| 1235 |
+
Returns
|
| 1236 |
+
-------
|
| 1237 |
+
bytes_written : int
|
| 1238 |
+
Total number of bytes written to the file
|
| 1239 |
+
"""
|
| 1240 |
+
cdef:
|
| 1241 |
+
int32_t metadata_length
|
| 1242 |
+
int64_t body_length
|
| 1243 |
+
|
| 1244 |
+
handle = dest.get_output_stream()
|
| 1245 |
+
|
| 1246 |
+
with nogil:
|
| 1247 |
+
check_status(
|
| 1248 |
+
WriteTensor(deref(tensor.tp), handle.get(),
|
| 1249 |
+
&metadata_length, &body_length))
|
| 1250 |
+
|
| 1251 |
+
return metadata_length + body_length
|
| 1252 |
+
|
| 1253 |
+
|
| 1254 |
+
cdef NativeFile as_native_file(source):
|
| 1255 |
+
if not isinstance(source, NativeFile):
|
| 1256 |
+
if hasattr(source, 'read'):
|
| 1257 |
+
source = PythonFile(source)
|
| 1258 |
+
else:
|
| 1259 |
+
source = BufferReader(source)
|
| 1260 |
+
|
| 1261 |
+
if not isinstance(source, NativeFile):
|
| 1262 |
+
raise ValueError('Unable to read message from object with type: {0}'
|
| 1263 |
+
.format(type(source)))
|
| 1264 |
+
return source
|
| 1265 |
+
|
| 1266 |
+
|
| 1267 |
+
def read_tensor(source):
|
| 1268 |
+
"""Read pyarrow.Tensor from pyarrow.NativeFile object from current
|
| 1269 |
+
position. If the file source supports zero copy (e.g. a memory map), then
|
| 1270 |
+
this operation does not allocate any memory. This function not assume that
|
| 1271 |
+
the stream is aligned
|
| 1272 |
+
|
| 1273 |
+
Parameters
|
| 1274 |
+
----------
|
| 1275 |
+
source : pyarrow.NativeFile
|
| 1276 |
+
|
| 1277 |
+
Returns
|
| 1278 |
+
-------
|
| 1279 |
+
tensor : Tensor
|
| 1280 |
+
|
| 1281 |
+
"""
|
| 1282 |
+
cdef:
|
| 1283 |
+
shared_ptr[CTensor] sp_tensor
|
| 1284 |
+
CInputStream* c_stream
|
| 1285 |
+
NativeFile nf = as_native_file(source)
|
| 1286 |
+
|
| 1287 |
+
c_stream = nf.get_input_stream().get()
|
| 1288 |
+
with nogil:
|
| 1289 |
+
sp_tensor = GetResultValue(ReadTensor(c_stream))
|
| 1290 |
+
return pyarrow_wrap_tensor(sp_tensor)
|
| 1291 |
+
|
| 1292 |
+
|
| 1293 |
+
def read_message(source):
|
| 1294 |
+
"""
|
| 1295 |
+
Read length-prefixed message from file or buffer-like object
|
| 1296 |
+
|
| 1297 |
+
Parameters
|
| 1298 |
+
----------
|
| 1299 |
+
source : pyarrow.NativeFile, file-like object, or buffer-like object
|
| 1300 |
+
|
| 1301 |
+
Returns
|
| 1302 |
+
-------
|
| 1303 |
+
message : Message
|
| 1304 |
+
"""
|
| 1305 |
+
cdef:
|
| 1306 |
+
Message result = Message.__new__(Message)
|
| 1307 |
+
CInputStream* c_stream
|
| 1308 |
+
|
| 1309 |
+
cdef NativeFile nf = as_native_file(source)
|
| 1310 |
+
c_stream = nf.get_input_stream().get()
|
| 1311 |
+
|
| 1312 |
+
with nogil:
|
| 1313 |
+
result.message = move(
|
| 1314 |
+
GetResultValue(ReadMessage(c_stream, c_default_memory_pool())))
|
| 1315 |
+
|
| 1316 |
+
if result.message == nullptr:
|
| 1317 |
+
raise EOFError("End of Arrow stream")
|
| 1318 |
+
|
| 1319 |
+
return result
|
| 1320 |
+
|
| 1321 |
+
|
| 1322 |
+
def read_schema(obj, DictionaryMemo dictionary_memo=None):
|
| 1323 |
+
"""
|
| 1324 |
+
Read Schema from message or buffer
|
| 1325 |
+
|
| 1326 |
+
Parameters
|
| 1327 |
+
----------
|
| 1328 |
+
obj : buffer or Message
|
| 1329 |
+
dictionary_memo : DictionaryMemo, optional
|
| 1330 |
+
Needed to be able to reconstruct dictionary-encoded fields
|
| 1331 |
+
with read_record_batch
|
| 1332 |
+
|
| 1333 |
+
Returns
|
| 1334 |
+
-------
|
| 1335 |
+
schema : Schema
|
| 1336 |
+
"""
|
| 1337 |
+
cdef:
|
| 1338 |
+
shared_ptr[CSchema] result
|
| 1339 |
+
shared_ptr[CRandomAccessFile] cpp_file
|
| 1340 |
+
Message message
|
| 1341 |
+
CDictionaryMemo temp_memo
|
| 1342 |
+
CDictionaryMemo* arg_dict_memo
|
| 1343 |
+
|
| 1344 |
+
if dictionary_memo is not None:
|
| 1345 |
+
arg_dict_memo = dictionary_memo.memo
|
| 1346 |
+
else:
|
| 1347 |
+
arg_dict_memo = &temp_memo
|
| 1348 |
+
|
| 1349 |
+
if isinstance(obj, Message):
|
| 1350 |
+
message = obj
|
| 1351 |
+
with nogil:
|
| 1352 |
+
result = GetResultValue(ReadSchema(
|
| 1353 |
+
deref(message.message.get()), arg_dict_memo))
|
| 1354 |
+
else:
|
| 1355 |
+
get_reader(obj, False, &cpp_file)
|
| 1356 |
+
with nogil:
|
| 1357 |
+
result = GetResultValue(ReadSchema(cpp_file.get(), arg_dict_memo))
|
| 1358 |
+
|
| 1359 |
+
return pyarrow_wrap_schema(result)
|
| 1360 |
+
|
| 1361 |
+
|
| 1362 |
+
def read_record_batch(obj, Schema schema,
|
| 1363 |
+
DictionaryMemo dictionary_memo=None):
|
| 1364 |
+
"""
|
| 1365 |
+
Read RecordBatch from message, given a known schema. If reading data from a
|
| 1366 |
+
complete IPC stream, use ipc.open_stream instead
|
| 1367 |
+
|
| 1368 |
+
Parameters
|
| 1369 |
+
----------
|
| 1370 |
+
obj : Message or Buffer-like
|
| 1371 |
+
schema : Schema
|
| 1372 |
+
dictionary_memo : DictionaryMemo, optional
|
| 1373 |
+
If message contains dictionaries, must pass a populated
|
| 1374 |
+
DictionaryMemo
|
| 1375 |
+
|
| 1376 |
+
Returns
|
| 1377 |
+
-------
|
| 1378 |
+
batch : RecordBatch
|
| 1379 |
+
"""
|
| 1380 |
+
cdef:
|
| 1381 |
+
shared_ptr[CRecordBatch] result
|
| 1382 |
+
Message message
|
| 1383 |
+
CDictionaryMemo temp_memo
|
| 1384 |
+
CDictionaryMemo* arg_dict_memo
|
| 1385 |
+
|
| 1386 |
+
if isinstance(obj, Message):
|
| 1387 |
+
message = obj
|
| 1388 |
+
else:
|
| 1389 |
+
message = read_message(obj)
|
| 1390 |
+
|
| 1391 |
+
if dictionary_memo is not None:
|
| 1392 |
+
arg_dict_memo = dictionary_memo.memo
|
| 1393 |
+
else:
|
| 1394 |
+
arg_dict_memo = &temp_memo
|
| 1395 |
+
|
| 1396 |
+
with nogil:
|
| 1397 |
+
result = GetResultValue(
|
| 1398 |
+
ReadRecordBatch(deref(message.message.get()),
|
| 1399 |
+
schema.sp_schema,
|
| 1400 |
+
arg_dict_memo,
|
| 1401 |
+
CIpcReadOptions.Defaults()))
|
| 1402 |
+
|
| 1403 |
+
return pyarrow_wrap_batch(result)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# Arrow file and stream reader/writer classes, and other messaging tools
|
| 19 |
+
|
| 20 |
+
import os
|
| 21 |
+
|
| 22 |
+
import pyarrow as pa
|
| 23 |
+
|
| 24 |
+
from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats, # noqa
|
| 25 |
+
Message, MessageReader,
|
| 26 |
+
RecordBatchReader, _ReadPandasMixin,
|
| 27 |
+
MetadataVersion,
|
| 28 |
+
read_message, read_record_batch, read_schema,
|
| 29 |
+
read_tensor, write_tensor,
|
| 30 |
+
get_record_batch_size, get_tensor_size)
|
| 31 |
+
import pyarrow.lib as lib
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class RecordBatchStreamReader(lib._RecordBatchStreamReader):
|
| 35 |
+
"""
|
| 36 |
+
Reader for the Arrow streaming binary format.
|
| 37 |
+
|
| 38 |
+
Parameters
|
| 39 |
+
----------
|
| 40 |
+
source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
|
| 41 |
+
Either an in-memory buffer, or a readable file object.
|
| 42 |
+
If you want to use memory map use MemoryMappedFile as source.
|
| 43 |
+
options : pyarrow.ipc.IpcReadOptions
|
| 44 |
+
Options for IPC deserialization.
|
| 45 |
+
If None, default values will be used.
|
| 46 |
+
memory_pool : MemoryPool, default None
|
| 47 |
+
If None, default memory pool is used.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
def __init__(self, source, *, options=None, memory_pool=None):
|
| 51 |
+
options = _ensure_default_ipc_read_options(options)
|
| 52 |
+
self._open(source, options=options, memory_pool=memory_pool)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
_ipc_writer_class_doc = """\
|
| 56 |
+
Parameters
|
| 57 |
+
----------
|
| 58 |
+
sink : str, pyarrow.NativeFile, or file-like Python object
|
| 59 |
+
Either a file path, or a writable file object.
|
| 60 |
+
schema : pyarrow.Schema
|
| 61 |
+
The Arrow schema for data to be written to the file.
|
| 62 |
+
use_legacy_format : bool, default None
|
| 63 |
+
Deprecated in favor of setting options. Cannot be provided with
|
| 64 |
+
options.
|
| 65 |
+
|
| 66 |
+
If None, False will be used unless this default is overridden by
|
| 67 |
+
setting the environment variable ARROW_PRE_0_15_IPC_FORMAT=1
|
| 68 |
+
options : pyarrow.ipc.IpcWriteOptions
|
| 69 |
+
Options for IPC serialization.
|
| 70 |
+
|
| 71 |
+
If None, default values will be used: the legacy format will not
|
| 72 |
+
be used unless overridden by setting the environment variable
|
| 73 |
+
ARROW_PRE_0_15_IPC_FORMAT=1, and the V5 metadata version will be
|
| 74 |
+
used unless overridden by setting the environment variable
|
| 75 |
+
ARROW_PRE_1_0_METADATA_VERSION=1."""
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
|
| 79 |
+
__doc__ = """Writer for the Arrow streaming binary format
|
| 80 |
+
|
| 81 |
+
{}""".format(_ipc_writer_class_doc)
|
| 82 |
+
|
| 83 |
+
def __init__(self, sink, schema, *, use_legacy_format=None, options=None):
|
| 84 |
+
options = _get_legacy_format_default(use_legacy_format, options)
|
| 85 |
+
self._open(sink, schema, options=options)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class RecordBatchFileReader(lib._RecordBatchFileReader):
|
| 89 |
+
"""
|
| 90 |
+
Class for reading Arrow record batch data from the Arrow binary file format
|
| 91 |
+
|
| 92 |
+
Parameters
|
| 93 |
+
----------
|
| 94 |
+
source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
|
| 95 |
+
Either an in-memory buffer, or a readable file object.
|
| 96 |
+
If you want to use memory map use MemoryMappedFile as source.
|
| 97 |
+
footer_offset : int, default None
|
| 98 |
+
If the file is embedded in some larger file, this is the byte offset to
|
| 99 |
+
the very end of the file data
|
| 100 |
+
options : pyarrow.ipc.IpcReadOptions
|
| 101 |
+
Options for IPC serialization.
|
| 102 |
+
If None, default values will be used.
|
| 103 |
+
memory_pool : MemoryPool, default None
|
| 104 |
+
If None, default memory pool is used.
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
+
def __init__(self, source, footer_offset=None, *, options=None,
|
| 108 |
+
memory_pool=None):
|
| 109 |
+
options = _ensure_default_ipc_read_options(options)
|
| 110 |
+
self._open(source, footer_offset=footer_offset,
|
| 111 |
+
options=options, memory_pool=memory_pool)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class RecordBatchFileWriter(lib._RecordBatchFileWriter):
|
| 115 |
+
|
| 116 |
+
__doc__ = """Writer to create the Arrow binary file format
|
| 117 |
+
|
| 118 |
+
{}""".format(_ipc_writer_class_doc)
|
| 119 |
+
|
| 120 |
+
def __init__(self, sink, schema, *, use_legacy_format=None, options=None):
|
| 121 |
+
options = _get_legacy_format_default(use_legacy_format, options)
|
| 122 |
+
self._open(sink, schema, options=options)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _get_legacy_format_default(use_legacy_format, options):
|
| 126 |
+
if use_legacy_format is not None and options is not None:
|
| 127 |
+
raise ValueError(
|
| 128 |
+
"Can provide at most one of options and use_legacy_format")
|
| 129 |
+
elif options:
|
| 130 |
+
if not isinstance(options, IpcWriteOptions):
|
| 131 |
+
raise TypeError("expected IpcWriteOptions, got {}"
|
| 132 |
+
.format(type(options)))
|
| 133 |
+
return options
|
| 134 |
+
|
| 135 |
+
metadata_version = MetadataVersion.V5
|
| 136 |
+
if use_legacy_format is None:
|
| 137 |
+
use_legacy_format = \
|
| 138 |
+
bool(int(os.environ.get('ARROW_PRE_0_15_IPC_FORMAT', '0')))
|
| 139 |
+
if bool(int(os.environ.get('ARROW_PRE_1_0_METADATA_VERSION', '0'))):
|
| 140 |
+
metadata_version = MetadataVersion.V4
|
| 141 |
+
return IpcWriteOptions(use_legacy_format=use_legacy_format,
|
| 142 |
+
metadata_version=metadata_version)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _ensure_default_ipc_read_options(options):
|
| 146 |
+
if options and not isinstance(options, IpcReadOptions):
|
| 147 |
+
raise TypeError(
|
| 148 |
+
"expected IpcReadOptions, got {}".format(type(options))
|
| 149 |
+
)
|
| 150 |
+
return options or IpcReadOptions()
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def new_stream(sink, schema, *, use_legacy_format=None, options=None):
|
| 154 |
+
return RecordBatchStreamWriter(sink, schema,
|
| 155 |
+
use_legacy_format=use_legacy_format,
|
| 156 |
+
options=options)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
new_stream.__doc__ = """\
|
| 160 |
+
Create an Arrow columnar IPC stream writer instance
|
| 161 |
+
|
| 162 |
+
{}
|
| 163 |
+
|
| 164 |
+
Returns
|
| 165 |
+
-------
|
| 166 |
+
writer : RecordBatchStreamWriter
|
| 167 |
+
A writer for the given sink
|
| 168 |
+
""".format(_ipc_writer_class_doc)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def open_stream(source, *, options=None, memory_pool=None):
|
| 172 |
+
"""
|
| 173 |
+
Create reader for Arrow streaming format.
|
| 174 |
+
|
| 175 |
+
Parameters
|
| 176 |
+
----------
|
| 177 |
+
source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
|
| 178 |
+
Either an in-memory buffer, or a readable file object.
|
| 179 |
+
options : pyarrow.ipc.IpcReadOptions
|
| 180 |
+
Options for IPC serialization.
|
| 181 |
+
If None, default values will be used.
|
| 182 |
+
memory_pool : MemoryPool, default None
|
| 183 |
+
If None, default memory pool is used.
|
| 184 |
+
|
| 185 |
+
Returns
|
| 186 |
+
-------
|
| 187 |
+
reader : RecordBatchStreamReader
|
| 188 |
+
A reader for the given source
|
| 189 |
+
"""
|
| 190 |
+
return RecordBatchStreamReader(source, options=options,
|
| 191 |
+
memory_pool=memory_pool)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def new_file(sink, schema, *, use_legacy_format=None, options=None):
|
| 195 |
+
return RecordBatchFileWriter(sink, schema,
|
| 196 |
+
use_legacy_format=use_legacy_format,
|
| 197 |
+
options=options)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
new_file.__doc__ = """\
|
| 201 |
+
Create an Arrow columnar IPC file writer instance
|
| 202 |
+
|
| 203 |
+
{}
|
| 204 |
+
|
| 205 |
+
Returns
|
| 206 |
+
-------
|
| 207 |
+
writer : RecordBatchFileWriter
|
| 208 |
+
A writer for the given sink
|
| 209 |
+
""".format(_ipc_writer_class_doc)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
|
| 213 |
+
"""
|
| 214 |
+
Create reader for Arrow file format.
|
| 215 |
+
|
| 216 |
+
Parameters
|
| 217 |
+
----------
|
| 218 |
+
source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
|
| 219 |
+
Either an in-memory buffer, or a readable file object.
|
| 220 |
+
footer_offset : int, default None
|
| 221 |
+
If the file is embedded in some larger file, this is the byte offset to
|
| 222 |
+
the very end of the file data.
|
| 223 |
+
options : pyarrow.ipc.IpcReadOptions
|
| 224 |
+
Options for IPC serialization.
|
| 225 |
+
If None, default values will be used.
|
| 226 |
+
memory_pool : MemoryPool, default None
|
| 227 |
+
If None, default memory pool is used.
|
| 228 |
+
|
| 229 |
+
Returns
|
| 230 |
+
-------
|
| 231 |
+
reader : RecordBatchFileReader
|
| 232 |
+
A reader for the given source
|
| 233 |
+
"""
|
| 234 |
+
return RecordBatchFileReader(
|
| 235 |
+
source, footer_offset=footer_offset,
|
| 236 |
+
options=options, memory_pool=memory_pool)
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def serialize_pandas(df, *, nthreads=None, preserve_index=None):
|
| 240 |
+
"""
|
| 241 |
+
Serialize a pandas DataFrame into a buffer protocol compatible object.
|
| 242 |
+
|
| 243 |
+
Parameters
|
| 244 |
+
----------
|
| 245 |
+
df : pandas.DataFrame
|
| 246 |
+
nthreads : int, default None
|
| 247 |
+
Number of threads to use for conversion to Arrow, default all CPUs.
|
| 248 |
+
preserve_index : bool, default None
|
| 249 |
+
The default of None will store the index as a column, except for
|
| 250 |
+
RangeIndex which is stored as metadata only. If True, always
|
| 251 |
+
preserve the pandas index data as a column. If False, no index
|
| 252 |
+
information is saved and the result will have a default RangeIndex.
|
| 253 |
+
|
| 254 |
+
Returns
|
| 255 |
+
-------
|
| 256 |
+
buf : buffer
|
| 257 |
+
An object compatible with the buffer protocol.
|
| 258 |
+
"""
|
| 259 |
+
batch = pa.RecordBatch.from_pandas(df, nthreads=nthreads,
|
| 260 |
+
preserve_index=preserve_index)
|
| 261 |
+
sink = pa.BufferOutputStream()
|
| 262 |
+
with pa.RecordBatchStreamWriter(sink, batch.schema) as writer:
|
| 263 |
+
writer.write_batch(batch)
|
| 264 |
+
return sink.getvalue()
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def deserialize_pandas(buf, *, use_threads=True):
|
| 268 |
+
"""Deserialize a buffer protocol compatible object into a pandas DataFrame.
|
| 269 |
+
|
| 270 |
+
Parameters
|
| 271 |
+
----------
|
| 272 |
+
buf : buffer
|
| 273 |
+
An object compatible with the buffer protocol.
|
| 274 |
+
use_threads : bool, default True
|
| 275 |
+
Whether to parallelize the conversion using multiple threads.
|
| 276 |
+
|
| 277 |
+
Returns
|
| 278 |
+
-------
|
| 279 |
+
df : pandas.DataFrame
|
| 280 |
+
The buffer deserialized as pandas DataFrame
|
| 281 |
+
"""
|
| 282 |
+
buffer_reader = pa.BufferReader(buf)
|
| 283 |
+
with pa.RecordBatchStreamReader(buffer_reader) as reader:
|
| 284 |
+
table = reader.read_all()
|
| 285 |
+
return table.to_pandas(use_threads=use_threads)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/jvm.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
"""
|
| 18 |
+
Functions to interact with Arrow memory allocated by Arrow Java.
|
| 19 |
+
|
| 20 |
+
These functions convert the objects holding the metadata, the actual
|
| 21 |
+
data is not copied at all.
|
| 22 |
+
|
| 23 |
+
This will only work with a JVM running in the same process such as provided
|
| 24 |
+
through jpype. Modules that talk to a remote JVM like py4j will not work as the
|
| 25 |
+
memory addresses reported by them are not reachable in the python process.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
import pyarrow as pa
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class _JvmBufferNanny:
|
| 32 |
+
"""
|
| 33 |
+
An object that keeps a org.apache.arrow.memory.ArrowBuf's underlying
|
| 34 |
+
memory alive.
|
| 35 |
+
"""
|
| 36 |
+
ref_manager = None
|
| 37 |
+
|
| 38 |
+
def __init__(self, jvm_buf):
|
| 39 |
+
ref_manager = jvm_buf.getReferenceManager()
|
| 40 |
+
# Will raise a java.lang.IllegalArgumentException if the buffer
|
| 41 |
+
# is already freed. It seems that exception cannot easily be
|
| 42 |
+
# caught...
|
| 43 |
+
ref_manager.retain()
|
| 44 |
+
self.ref_manager = ref_manager
|
| 45 |
+
|
| 46 |
+
def __del__(self):
|
| 47 |
+
if self.ref_manager is not None:
|
| 48 |
+
self.ref_manager.release()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def jvm_buffer(jvm_buf):
|
| 52 |
+
"""
|
| 53 |
+
Construct an Arrow buffer from org.apache.arrow.memory.ArrowBuf
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
|
| 58 |
+
jvm_buf: org.apache.arrow.memory.ArrowBuf
|
| 59 |
+
Arrow Buffer representation on the JVM.
|
| 60 |
+
|
| 61 |
+
Returns
|
| 62 |
+
-------
|
| 63 |
+
pyarrow.Buffer
|
| 64 |
+
Python Buffer that references the JVM memory.
|
| 65 |
+
"""
|
| 66 |
+
nanny = _JvmBufferNanny(jvm_buf)
|
| 67 |
+
address = jvm_buf.memoryAddress()
|
| 68 |
+
size = jvm_buf.capacity()
|
| 69 |
+
return pa.foreign_buffer(address, size, base=nanny)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _from_jvm_int_type(jvm_type):
|
| 73 |
+
"""
|
| 74 |
+
Convert a JVM int type to its Python equivalent.
|
| 75 |
+
|
| 76 |
+
Parameters
|
| 77 |
+
----------
|
| 78 |
+
jvm_type : org.apache.arrow.vector.types.pojo.ArrowType$Int
|
| 79 |
+
|
| 80 |
+
Returns
|
| 81 |
+
-------
|
| 82 |
+
typ : pyarrow.DataType
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
bit_width = jvm_type.getBitWidth()
|
| 86 |
+
if jvm_type.getIsSigned():
|
| 87 |
+
if bit_width == 8:
|
| 88 |
+
return pa.int8()
|
| 89 |
+
elif bit_width == 16:
|
| 90 |
+
return pa.int16()
|
| 91 |
+
elif bit_width == 32:
|
| 92 |
+
return pa.int32()
|
| 93 |
+
elif bit_width == 64:
|
| 94 |
+
return pa.int64()
|
| 95 |
+
else:
|
| 96 |
+
if bit_width == 8:
|
| 97 |
+
return pa.uint8()
|
| 98 |
+
elif bit_width == 16:
|
| 99 |
+
return pa.uint16()
|
| 100 |
+
elif bit_width == 32:
|
| 101 |
+
return pa.uint32()
|
| 102 |
+
elif bit_width == 64:
|
| 103 |
+
return pa.uint64()
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _from_jvm_float_type(jvm_type):
|
| 107 |
+
"""
|
| 108 |
+
Convert a JVM float type to its Python equivalent.
|
| 109 |
+
|
| 110 |
+
Parameters
|
| 111 |
+
----------
|
| 112 |
+
jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$FloatingPoint
|
| 113 |
+
|
| 114 |
+
Returns
|
| 115 |
+
-------
|
| 116 |
+
typ: pyarrow.DataType
|
| 117 |
+
"""
|
| 118 |
+
precision = jvm_type.getPrecision().toString()
|
| 119 |
+
if precision == 'HALF':
|
| 120 |
+
return pa.float16()
|
| 121 |
+
elif precision == 'SINGLE':
|
| 122 |
+
return pa.float32()
|
| 123 |
+
elif precision == 'DOUBLE':
|
| 124 |
+
return pa.float64()
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def _from_jvm_time_type(jvm_type):
|
| 128 |
+
"""
|
| 129 |
+
Convert a JVM time type to its Python equivalent.
|
| 130 |
+
|
| 131 |
+
Parameters
|
| 132 |
+
----------
|
| 133 |
+
jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Time
|
| 134 |
+
|
| 135 |
+
Returns
|
| 136 |
+
-------
|
| 137 |
+
typ: pyarrow.DataType
|
| 138 |
+
"""
|
| 139 |
+
time_unit = jvm_type.getUnit().toString()
|
| 140 |
+
if time_unit == 'SECOND':
|
| 141 |
+
assert jvm_type.getBitWidth() == 32
|
| 142 |
+
return pa.time32('s')
|
| 143 |
+
elif time_unit == 'MILLISECOND':
|
| 144 |
+
assert jvm_type.getBitWidth() == 32
|
| 145 |
+
return pa.time32('ms')
|
| 146 |
+
elif time_unit == 'MICROSECOND':
|
| 147 |
+
assert jvm_type.getBitWidth() == 64
|
| 148 |
+
return pa.time64('us')
|
| 149 |
+
elif time_unit == 'NANOSECOND':
|
| 150 |
+
assert jvm_type.getBitWidth() == 64
|
| 151 |
+
return pa.time64('ns')
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _from_jvm_timestamp_type(jvm_type):
|
| 155 |
+
"""
|
| 156 |
+
Convert a JVM timestamp type to its Python equivalent.
|
| 157 |
+
|
| 158 |
+
Parameters
|
| 159 |
+
----------
|
| 160 |
+
jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Timestamp
|
| 161 |
+
|
| 162 |
+
Returns
|
| 163 |
+
-------
|
| 164 |
+
typ: pyarrow.DataType
|
| 165 |
+
"""
|
| 166 |
+
time_unit = jvm_type.getUnit().toString()
|
| 167 |
+
timezone = jvm_type.getTimezone()
|
| 168 |
+
if timezone is not None:
|
| 169 |
+
timezone = str(timezone)
|
| 170 |
+
if time_unit == 'SECOND':
|
| 171 |
+
return pa.timestamp('s', tz=timezone)
|
| 172 |
+
elif time_unit == 'MILLISECOND':
|
| 173 |
+
return pa.timestamp('ms', tz=timezone)
|
| 174 |
+
elif time_unit == 'MICROSECOND':
|
| 175 |
+
return pa.timestamp('us', tz=timezone)
|
| 176 |
+
elif time_unit == 'NANOSECOND':
|
| 177 |
+
return pa.timestamp('ns', tz=timezone)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _from_jvm_date_type(jvm_type):
|
| 181 |
+
"""
|
| 182 |
+
Convert a JVM date type to its Python equivalent
|
| 183 |
+
|
| 184 |
+
Parameters
|
| 185 |
+
----------
|
| 186 |
+
jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Date
|
| 187 |
+
|
| 188 |
+
Returns
|
| 189 |
+
-------
|
| 190 |
+
typ: pyarrow.DataType
|
| 191 |
+
"""
|
| 192 |
+
day_unit = jvm_type.getUnit().toString()
|
| 193 |
+
if day_unit == 'DAY':
|
| 194 |
+
return pa.date32()
|
| 195 |
+
elif day_unit == 'MILLISECOND':
|
| 196 |
+
return pa.date64()
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def field(jvm_field):
|
| 200 |
+
"""
|
| 201 |
+
Construct a Field from a org.apache.arrow.vector.types.pojo.Field
|
| 202 |
+
instance.
|
| 203 |
+
|
| 204 |
+
Parameters
|
| 205 |
+
----------
|
| 206 |
+
jvm_field: org.apache.arrow.vector.types.pojo.Field
|
| 207 |
+
|
| 208 |
+
Returns
|
| 209 |
+
-------
|
| 210 |
+
pyarrow.Field
|
| 211 |
+
"""
|
| 212 |
+
name = str(jvm_field.getName())
|
| 213 |
+
jvm_type = jvm_field.getType()
|
| 214 |
+
|
| 215 |
+
typ = None
|
| 216 |
+
if not jvm_type.isComplex():
|
| 217 |
+
type_str = jvm_type.getTypeID().toString()
|
| 218 |
+
if type_str == 'Null':
|
| 219 |
+
typ = pa.null()
|
| 220 |
+
elif type_str == 'Int':
|
| 221 |
+
typ = _from_jvm_int_type(jvm_type)
|
| 222 |
+
elif type_str == 'FloatingPoint':
|
| 223 |
+
typ = _from_jvm_float_type(jvm_type)
|
| 224 |
+
elif type_str == 'Utf8':
|
| 225 |
+
typ = pa.string()
|
| 226 |
+
elif type_str == 'Binary':
|
| 227 |
+
typ = pa.binary()
|
| 228 |
+
elif type_str == 'FixedSizeBinary':
|
| 229 |
+
typ = pa.binary(jvm_type.getByteWidth())
|
| 230 |
+
elif type_str == 'Bool':
|
| 231 |
+
typ = pa.bool_()
|
| 232 |
+
elif type_str == 'Time':
|
| 233 |
+
typ = _from_jvm_time_type(jvm_type)
|
| 234 |
+
elif type_str == 'Timestamp':
|
| 235 |
+
typ = _from_jvm_timestamp_type(jvm_type)
|
| 236 |
+
elif type_str == 'Date':
|
| 237 |
+
typ = _from_jvm_date_type(jvm_type)
|
| 238 |
+
elif type_str == 'Decimal':
|
| 239 |
+
typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale())
|
| 240 |
+
else:
|
| 241 |
+
raise NotImplementedError(
|
| 242 |
+
"Unsupported JVM type: {}".format(type_str))
|
| 243 |
+
else:
|
| 244 |
+
# TODO: The following JVM types are not implemented:
|
| 245 |
+
# Struct, List, FixedSizeList, Union, Dictionary
|
| 246 |
+
raise NotImplementedError(
|
| 247 |
+
"JVM field conversion only implemented for primitive types.")
|
| 248 |
+
|
| 249 |
+
nullable = jvm_field.isNullable()
|
| 250 |
+
jvm_metadata = jvm_field.getMetadata()
|
| 251 |
+
if jvm_metadata.isEmpty():
|
| 252 |
+
metadata = None
|
| 253 |
+
else:
|
| 254 |
+
metadata = {str(entry.getKey()): str(entry.getValue())
|
| 255 |
+
for entry in jvm_metadata.entrySet()}
|
| 256 |
+
return pa.field(name, typ, nullable, metadata)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def schema(jvm_schema):
|
| 260 |
+
"""
|
| 261 |
+
Construct a Schema from a org.apache.arrow.vector.types.pojo.Schema
|
| 262 |
+
instance.
|
| 263 |
+
|
| 264 |
+
Parameters
|
| 265 |
+
----------
|
| 266 |
+
jvm_schema: org.apache.arrow.vector.types.pojo.Schema
|
| 267 |
+
|
| 268 |
+
Returns
|
| 269 |
+
-------
|
| 270 |
+
pyarrow.Schema
|
| 271 |
+
"""
|
| 272 |
+
fields = jvm_schema.getFields()
|
| 273 |
+
fields = [field(f) for f in fields]
|
| 274 |
+
jvm_metadata = jvm_schema.getCustomMetadata()
|
| 275 |
+
if jvm_metadata.isEmpty():
|
| 276 |
+
metadata = None
|
| 277 |
+
else:
|
| 278 |
+
metadata = {str(entry.getKey()): str(entry.getValue())
|
| 279 |
+
for entry in jvm_metadata.entrySet()}
|
| 280 |
+
return pa.schema(fields, metadata)
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def array(jvm_array):
|
| 284 |
+
"""
|
| 285 |
+
Construct an (Python) Array from its JVM equivalent.
|
| 286 |
+
|
| 287 |
+
Parameters
|
| 288 |
+
----------
|
| 289 |
+
jvm_array : org.apache.arrow.vector.ValueVector
|
| 290 |
+
|
| 291 |
+
Returns
|
| 292 |
+
-------
|
| 293 |
+
array : Array
|
| 294 |
+
"""
|
| 295 |
+
if jvm_array.getField().getType().isComplex():
|
| 296 |
+
minor_type_str = jvm_array.getMinorType().toString()
|
| 297 |
+
raise NotImplementedError(
|
| 298 |
+
"Cannot convert JVM Arrow array of type {},"
|
| 299 |
+
" complex types not yet implemented.".format(minor_type_str))
|
| 300 |
+
dtype = field(jvm_array.getField()).type
|
| 301 |
+
buffers = [jvm_buffer(buf)
|
| 302 |
+
for buf in list(jvm_array.getBuffers(False))]
|
| 303 |
+
|
| 304 |
+
# If JVM has an empty Vector, buffer list will be empty so create manually
|
| 305 |
+
if len(buffers) == 0:
|
| 306 |
+
return pa.array([], type=dtype)
|
| 307 |
+
|
| 308 |
+
length = jvm_array.getValueCount()
|
| 309 |
+
null_count = jvm_array.getNullCount()
|
| 310 |
+
return pa.Array.from_buffers(dtype, length, buffers, null_count)
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def record_batch(jvm_vector_schema_root):
|
| 314 |
+
"""
|
| 315 |
+
Construct a (Python) RecordBatch from a JVM VectorSchemaRoot
|
| 316 |
+
|
| 317 |
+
Parameters
|
| 318 |
+
----------
|
| 319 |
+
jvm_vector_schema_root : org.apache.arrow.vector.VectorSchemaRoot
|
| 320 |
+
|
| 321 |
+
Returns
|
| 322 |
+
-------
|
| 323 |
+
record_batch: pyarrow.RecordBatch
|
| 324 |
+
"""
|
| 325 |
+
pa_schema = schema(jvm_vector_schema_root.getSchema())
|
| 326 |
+
|
| 327 |
+
arrays = []
|
| 328 |
+
for name in pa_schema.names:
|
| 329 |
+
arrays.append(array(jvm_vector_schema_root.getVector(name)))
|
| 330 |
+
|
| 331 |
+
return pa.RecordBatch.from_arrays(
|
| 332 |
+
arrays,
|
| 333 |
+
pa_schema.names,
|
| 334 |
+
metadata=pa_schema.metadata
|
| 335 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib.pxd
ADDED
|
@@ -0,0 +1,770 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
| 2 |
+
# or more contributor license agreements. See the NOTICE file
|
| 3 |
+
# distributed with this work for additional information
|
| 4 |
+
# regarding copyright ownership. The ASF licenses this file
|
| 5 |
+
# to you under the Apache License, Version 2.0 (the
|
| 6 |
+
# "License"); you may not use this file except in compliance
|
| 7 |
+
# with the License. You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing,
|
| 12 |
+
# software distributed under the License is distributed on an
|
| 13 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 14 |
+
# KIND, either express or implied. See the License for the
|
| 15 |
+
# specific language governing permissions and limitations
|
| 16 |
+
# under the License.
|
| 17 |
+
|
| 18 |
+
# cython: language_level = 3
|
| 19 |
+
|
| 20 |
+
from cpython cimport PyObject
|
| 21 |
+
from libcpp cimport nullptr, bool as c_bool
|
| 22 |
+
from libcpp.cast cimport dynamic_cast
|
| 23 |
+
from libcpp.memory cimport dynamic_pointer_cast
|
| 24 |
+
from pyarrow.includes.common cimport *
|
| 25 |
+
from pyarrow.includes.libarrow cimport *
|
| 26 |
+
from pyarrow.includes.libarrow_python cimport *
|
| 27 |
+
|
| 28 |
+
# Will be available in Cython 3, not backported
|
| 29 |
+
# ref: https://github.com/cython/cython/issues/3293#issuecomment-1223058101
|
| 30 |
+
cdef extern from "<optional>" namespace "std" nogil:
|
| 31 |
+
cdef cppclass nullopt_t:
|
| 32 |
+
nullopt_t()
|
| 33 |
+
|
| 34 |
+
cdef nullopt_t nullopt
|
| 35 |
+
|
| 36 |
+
cdef cppclass optional[T]:
|
| 37 |
+
ctypedef T value_type
|
| 38 |
+
optional()
|
| 39 |
+
optional(nullopt_t)
|
| 40 |
+
optional(optional&) except +
|
| 41 |
+
optional(T&) except +
|
| 42 |
+
c_bool has_value()
|
| 43 |
+
T& value()
|
| 44 |
+
T& value_or[U](U& default_value)
|
| 45 |
+
void swap(optional&)
|
| 46 |
+
void reset()
|
| 47 |
+
T& emplace(...)
|
| 48 |
+
T& operator*()
|
| 49 |
+
# T* operator->() # Not Supported
|
| 50 |
+
optional& operator=(optional&)
|
| 51 |
+
optional& operator=[U](U&)
|
| 52 |
+
c_bool operator bool()
|
| 53 |
+
c_bool operator!()
|
| 54 |
+
c_bool operator==[U](optional&, U&)
|
| 55 |
+
c_bool operator!=[U](optional&, U&)
|
| 56 |
+
c_bool operator<[U](optional&, U&)
|
| 57 |
+
c_bool operator>[U](optional&, U&)
|
| 58 |
+
c_bool operator<=[U](optional&, U&)
|
| 59 |
+
c_bool operator>=[U](optional&, U&)
|
| 60 |
+
|
| 61 |
+
optional[T] make_optional[T](...) except +
|
| 62 |
+
|
| 63 |
+
cdef extern from "Python.h":
|
| 64 |
+
int PySlice_Check(object)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
cdef int check_status(const CStatus& status) except -1 nogil
|
| 68 |
+
cdef object convert_status(const CStatus& status)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
cdef class _Weakrefable:
|
| 72 |
+
cdef object __weakref__
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
cdef class IpcWriteOptions(_Weakrefable):
|
| 76 |
+
cdef:
|
| 77 |
+
CIpcWriteOptions c_options
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
cdef class IpcReadOptions(_Weakrefable):
|
| 81 |
+
cdef:
|
| 82 |
+
CIpcReadOptions c_options
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
cdef class Message(_Weakrefable):
|
| 86 |
+
cdef:
|
| 87 |
+
unique_ptr[CMessage] message
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
cdef class MemoryPool(_Weakrefable):
|
| 91 |
+
cdef:
|
| 92 |
+
CMemoryPool* pool
|
| 93 |
+
|
| 94 |
+
cdef void init(self, CMemoryPool* pool)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
cdef object box_memory_pool(CMemoryPool* pool)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
cdef class DataType(_Weakrefable):
|
| 104 |
+
cdef:
|
| 105 |
+
shared_ptr[CDataType] sp_type
|
| 106 |
+
CDataType* type
|
| 107 |
+
bytes pep3118_format
|
| 108 |
+
|
| 109 |
+
cdef void init(self, const shared_ptr[CDataType]& type) except *
|
| 110 |
+
cpdef Field field(self, i)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
cdef class ListType(DataType):
|
| 114 |
+
cdef:
|
| 115 |
+
const CListType* list_type
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
cdef class LargeListType(DataType):
|
| 119 |
+
cdef:
|
| 120 |
+
const CLargeListType* list_type
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
cdef class ListViewType(DataType):
|
| 124 |
+
cdef:
|
| 125 |
+
const CListViewType* list_view_type
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
cdef class LargeListViewType(DataType):
|
| 129 |
+
cdef:
|
| 130 |
+
const CLargeListViewType* list_view_type
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
cdef class MapType(DataType):
|
| 134 |
+
cdef:
|
| 135 |
+
const CMapType* map_type
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
cdef class FixedSizeListType(DataType):
|
| 139 |
+
cdef:
|
| 140 |
+
const CFixedSizeListType* list_type
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
cdef class StructType(DataType):
|
| 144 |
+
cdef:
|
| 145 |
+
const CStructType* struct_type
|
| 146 |
+
|
| 147 |
+
cdef Field field_by_name(self, name)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
cdef class DictionaryMemo(_Weakrefable):
|
| 151 |
+
cdef:
|
| 152 |
+
# Even though the CDictionaryMemo instance is private, we allocate
|
| 153 |
+
# it on the heap so as to avoid C++ ABI issues with Python wheels.
|
| 154 |
+
shared_ptr[CDictionaryMemo] sp_memo
|
| 155 |
+
CDictionaryMemo* memo
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
cdef class DictionaryType(DataType):
|
| 159 |
+
cdef:
|
| 160 |
+
const CDictionaryType* dict_type
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
cdef class TimestampType(DataType):
|
| 164 |
+
cdef:
|
| 165 |
+
const CTimestampType* ts_type
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
cdef class Time32Type(DataType):
|
| 169 |
+
cdef:
|
| 170 |
+
const CTime32Type* time_type
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
cdef class Time64Type(DataType):
|
| 174 |
+
cdef:
|
| 175 |
+
const CTime64Type* time_type
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
cdef class DurationType(DataType):
|
| 179 |
+
cdef:
|
| 180 |
+
const CDurationType* duration_type
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
cdef class FixedSizeBinaryType(DataType):
|
| 184 |
+
cdef:
|
| 185 |
+
const CFixedSizeBinaryType* fixed_size_binary_type
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
cdef class Decimal32Type(FixedSizeBinaryType):
|
| 189 |
+
cdef:
|
| 190 |
+
const CDecimal32Type* decimal32_type
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
cdef class Decimal64Type(FixedSizeBinaryType):
|
| 194 |
+
cdef:
|
| 195 |
+
const CDecimal64Type* decimal64_type
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
cdef class Decimal128Type(FixedSizeBinaryType):
|
| 199 |
+
cdef:
|
| 200 |
+
const CDecimal128Type* decimal128_type
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
cdef class Decimal256Type(FixedSizeBinaryType):
|
| 204 |
+
cdef:
|
| 205 |
+
const CDecimal256Type* decimal256_type
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
cdef class RunEndEncodedType(DataType):
|
| 209 |
+
cdef:
|
| 210 |
+
const CRunEndEncodedType* run_end_encoded_type
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
cdef class BaseExtensionType(DataType):
|
| 214 |
+
cdef:
|
| 215 |
+
const CExtensionType* ext_type
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
cdef class ExtensionType(BaseExtensionType):
|
| 219 |
+
cdef:
|
| 220 |
+
const CPyExtensionType* cpy_ext_type
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
cdef class FixedShapeTensorType(BaseExtensionType):
|
| 224 |
+
cdef:
|
| 225 |
+
const CFixedShapeTensorType* tensor_ext_type
|
| 226 |
+
|
| 227 |
+
cdef class Bool8Type(BaseExtensionType):
|
| 228 |
+
cdef:
|
| 229 |
+
const CBool8Type* bool8_ext_type
|
| 230 |
+
|
| 231 |
+
cdef class OpaqueType(BaseExtensionType):
|
| 232 |
+
cdef:
|
| 233 |
+
const COpaqueType* opaque_ext_type
|
| 234 |
+
|
| 235 |
+
cdef class UuidType(BaseExtensionType):
|
| 236 |
+
cdef:
|
| 237 |
+
const CUuidType* uuid_ext_type
|
| 238 |
+
|
| 239 |
+
cdef class JsonType(BaseExtensionType):
|
| 240 |
+
cdef:
|
| 241 |
+
const CJsonType* json_ext_type
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
cdef class PyExtensionType(ExtensionType):
|
| 245 |
+
pass
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
cdef class _Metadata(_Weakrefable):
|
| 249 |
+
# required because KeyValueMetadata also extends collections.abc.Mapping
|
| 250 |
+
# and the first parent class must be an extension type
|
| 251 |
+
pass
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
cdef class KeyValueMetadata(_Metadata):
|
| 255 |
+
cdef:
|
| 256 |
+
shared_ptr[const CKeyValueMetadata] wrapped
|
| 257 |
+
const CKeyValueMetadata* metadata
|
| 258 |
+
|
| 259 |
+
cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped)
|
| 260 |
+
|
| 261 |
+
@staticmethod
|
| 262 |
+
cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp)
|
| 263 |
+
cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
cdef class Field(_Weakrefable):
|
| 267 |
+
cdef:
|
| 268 |
+
shared_ptr[CField] sp_field
|
| 269 |
+
CField* field
|
| 270 |
+
|
| 271 |
+
cdef readonly:
|
| 272 |
+
DataType type
|
| 273 |
+
|
| 274 |
+
cdef void init(self, const shared_ptr[CField]& field)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
cdef class Schema(_Weakrefable):
|
| 278 |
+
cdef:
|
| 279 |
+
shared_ptr[CSchema] sp_schema
|
| 280 |
+
CSchema* schema
|
| 281 |
+
|
| 282 |
+
cdef void init(self, const vector[shared_ptr[CField]]& fields)
|
| 283 |
+
cdef void init_schema(self, const shared_ptr[CSchema]& schema)
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
cdef class Scalar(_Weakrefable):
|
| 287 |
+
cdef:
|
| 288 |
+
shared_ptr[CScalar] wrapped
|
| 289 |
+
|
| 290 |
+
cdef void init(self, const shared_ptr[CScalar]& wrapped)
|
| 291 |
+
|
| 292 |
+
@staticmethod
|
| 293 |
+
cdef wrap(const shared_ptr[CScalar]& wrapped)
|
| 294 |
+
|
| 295 |
+
cdef inline shared_ptr[CScalar] unwrap(self) nogil
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
cdef class _PandasConvertible(_Weakrefable):
|
| 299 |
+
pass
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
cdef class Array(_PandasConvertible):
|
| 303 |
+
cdef:
|
| 304 |
+
shared_ptr[CArray] sp_array
|
| 305 |
+
CArray* ap
|
| 306 |
+
|
| 307 |
+
cdef readonly:
|
| 308 |
+
DataType type
|
| 309 |
+
# To allow Table to propagate metadata to pandas.Series
|
| 310 |
+
object _name
|
| 311 |
+
|
| 312 |
+
cdef void init(self, const shared_ptr[CArray]& sp_array) except *
|
| 313 |
+
cdef getitem(self, int64_t i)
|
| 314 |
+
cdef int64_t length(self)
|
| 315 |
+
cdef void _assert_cpu(self) except *
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
cdef class Tensor(_Weakrefable):
|
| 319 |
+
cdef:
|
| 320 |
+
shared_ptr[CTensor] sp_tensor
|
| 321 |
+
CTensor* tp
|
| 322 |
+
|
| 323 |
+
cdef readonly:
|
| 324 |
+
DataType type
|
| 325 |
+
bytes _ssize_t_shape
|
| 326 |
+
bytes _ssize_t_strides
|
| 327 |
+
|
| 328 |
+
cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
cdef class SparseCSRMatrix(_Weakrefable):
|
| 332 |
+
cdef:
|
| 333 |
+
shared_ptr[CSparseCSRMatrix] sp_sparse_tensor
|
| 334 |
+
CSparseCSRMatrix* stp
|
| 335 |
+
|
| 336 |
+
cdef readonly:
|
| 337 |
+
DataType type
|
| 338 |
+
|
| 339 |
+
cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
cdef class SparseCSCMatrix(_Weakrefable):
|
| 343 |
+
cdef:
|
| 344 |
+
shared_ptr[CSparseCSCMatrix] sp_sparse_tensor
|
| 345 |
+
CSparseCSCMatrix* stp
|
| 346 |
+
|
| 347 |
+
cdef readonly:
|
| 348 |
+
DataType type
|
| 349 |
+
|
| 350 |
+
cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
cdef class SparseCOOTensor(_Weakrefable):
|
| 354 |
+
cdef:
|
| 355 |
+
shared_ptr[CSparseCOOTensor] sp_sparse_tensor
|
| 356 |
+
CSparseCOOTensor* stp
|
| 357 |
+
|
| 358 |
+
cdef readonly:
|
| 359 |
+
DataType type
|
| 360 |
+
|
| 361 |
+
cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
cdef class SparseCSFTensor(_Weakrefable):
|
| 365 |
+
cdef:
|
| 366 |
+
shared_ptr[CSparseCSFTensor] sp_sparse_tensor
|
| 367 |
+
CSparseCSFTensor* stp
|
| 368 |
+
|
| 369 |
+
cdef readonly:
|
| 370 |
+
DataType type
|
| 371 |
+
|
| 372 |
+
cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
cdef class NullArray(Array):
|
| 376 |
+
pass
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
cdef class BooleanArray(Array):
|
| 380 |
+
pass
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
cdef class NumericArray(Array):
|
| 384 |
+
pass
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
cdef class IntegerArray(NumericArray):
|
| 388 |
+
pass
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
cdef class FloatingPointArray(NumericArray):
|
| 392 |
+
pass
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
cdef class Int8Array(IntegerArray):
|
| 396 |
+
pass
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
cdef class UInt8Array(IntegerArray):
|
| 400 |
+
pass
|
| 401 |
+
|
| 402 |
+
|
| 403 |
+
cdef class Int16Array(IntegerArray):
|
| 404 |
+
pass
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
cdef class UInt16Array(IntegerArray):
|
| 408 |
+
pass
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
cdef class Int32Array(IntegerArray):
|
| 412 |
+
pass
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
cdef class UInt32Array(IntegerArray):
|
| 416 |
+
pass
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
cdef class Int64Array(IntegerArray):
|
| 420 |
+
pass
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
cdef class UInt64Array(IntegerArray):
|
| 424 |
+
pass
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
cdef class HalfFloatArray(FloatingPointArray):
|
| 428 |
+
pass
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
cdef class FloatArray(FloatingPointArray):
|
| 432 |
+
pass
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
cdef class DoubleArray(FloatingPointArray):
|
| 436 |
+
pass
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
cdef class FixedSizeBinaryArray(Array):
|
| 440 |
+
pass
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
cdef class Decimal32Array(FixedSizeBinaryArray):
|
| 444 |
+
pass
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
cdef class Decimal64Array(FixedSizeBinaryArray):
|
| 448 |
+
pass
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
cdef class Decimal128Array(FixedSizeBinaryArray):
|
| 452 |
+
pass
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
cdef class Decimal256Array(FixedSizeBinaryArray):
|
| 456 |
+
pass
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
cdef class StructArray(Array):
|
| 460 |
+
pass
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
cdef class BaseListArray(Array):
|
| 464 |
+
pass
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
cdef class ListArray(BaseListArray):
|
| 468 |
+
pass
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
cdef class LargeListArray(BaseListArray):
|
| 472 |
+
pass
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
cdef class ListViewArray(BaseListArray):
|
| 476 |
+
pass
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
cdef class LargeListViewArray(BaseListArray):
|
| 480 |
+
pass
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
cdef class MapArray(ListArray):
|
| 484 |
+
pass
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
cdef class FixedSizeListArray(BaseListArray):
|
| 488 |
+
pass
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
cdef class UnionArray(Array):
|
| 492 |
+
pass
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
cdef class StringArray(Array):
|
| 496 |
+
pass
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
cdef class BinaryArray(Array):
|
| 500 |
+
pass
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
cdef class StringViewArray(Array):
|
| 504 |
+
pass
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
cdef class BinaryViewArray(Array):
|
| 508 |
+
pass
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
cdef class DictionaryArray(Array):
|
| 512 |
+
cdef:
|
| 513 |
+
object _indices, _dictionary
|
| 514 |
+
|
| 515 |
+
|
| 516 |
+
cdef class ExtensionArray(Array):
|
| 517 |
+
pass
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
cdef class MonthDayNanoIntervalArray(Array):
|
| 521 |
+
pass
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
cdef wrap_array_output(PyObject* output)
|
| 525 |
+
cdef wrap_datum(const CDatum& datum)
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
cdef class ChunkedArray(_PandasConvertible):
|
| 529 |
+
cdef:
|
| 530 |
+
shared_ptr[CChunkedArray] sp_chunked_array
|
| 531 |
+
CChunkedArray* chunked_array
|
| 532 |
+
c_bool _is_cpu
|
| 533 |
+
c_bool _init_is_cpu
|
| 534 |
+
|
| 535 |
+
cdef readonly:
|
| 536 |
+
# To allow Table to propagate metadata to pandas.Series
|
| 537 |
+
object _name
|
| 538 |
+
|
| 539 |
+
cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
|
| 540 |
+
cdef getitem(self, int64_t i)
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
cdef class _Tabular(_PandasConvertible):
|
| 544 |
+
cdef void _assert_cpu(self) except *
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
cdef class Table(_Tabular):
|
| 548 |
+
cdef:
|
| 549 |
+
shared_ptr[CTable] sp_table
|
| 550 |
+
CTable* table
|
| 551 |
+
c_bool _is_cpu
|
| 552 |
+
c_bool _init_is_cpu
|
| 553 |
+
|
| 554 |
+
cdef void init(self, const shared_ptr[CTable]& table)
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
cdef class RecordBatch(_Tabular):
|
| 558 |
+
cdef:
|
| 559 |
+
shared_ptr[CRecordBatch] sp_batch
|
| 560 |
+
CRecordBatch* batch
|
| 561 |
+
Schema _schema
|
| 562 |
+
|
| 563 |
+
cdef void init(self, const shared_ptr[CRecordBatch]& table)
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
cdef class Device(_Weakrefable):
|
| 567 |
+
cdef:
|
| 568 |
+
shared_ptr[CDevice] device
|
| 569 |
+
|
| 570 |
+
cdef void init(self, const shared_ptr[CDevice]& device)
|
| 571 |
+
|
| 572 |
+
@staticmethod
|
| 573 |
+
cdef wrap(const shared_ptr[CDevice]& device)
|
| 574 |
+
|
| 575 |
+
cdef inline shared_ptr[CDevice] unwrap(self) nogil
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
cdef class MemoryManager(_Weakrefable):
|
| 579 |
+
cdef:
|
| 580 |
+
shared_ptr[CMemoryManager] memory_manager
|
| 581 |
+
|
| 582 |
+
cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)
|
| 583 |
+
|
| 584 |
+
@staticmethod
|
| 585 |
+
cdef wrap(const shared_ptr[CMemoryManager]& mm)
|
| 586 |
+
|
| 587 |
+
cdef inline shared_ptr[CMemoryManager] unwrap(self) nogil
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
cdef class Buffer(_Weakrefable):
|
| 591 |
+
cdef:
|
| 592 |
+
shared_ptr[CBuffer] buffer
|
| 593 |
+
Py_ssize_t shape[1]
|
| 594 |
+
Py_ssize_t strides[1]
|
| 595 |
+
|
| 596 |
+
cdef void init(self, const shared_ptr[CBuffer]& buffer)
|
| 597 |
+
cdef getitem(self, int64_t i)
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
cdef class ResizableBuffer(Buffer):
|
| 601 |
+
|
| 602 |
+
cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
cdef class NativeFile(_Weakrefable):
|
| 606 |
+
cdef:
|
| 607 |
+
shared_ptr[CInputStream] input_stream
|
| 608 |
+
shared_ptr[CRandomAccessFile] random_access
|
| 609 |
+
shared_ptr[COutputStream] output_stream
|
| 610 |
+
bint is_readable
|
| 611 |
+
bint is_writable
|
| 612 |
+
bint is_seekable
|
| 613 |
+
bint _is_appending
|
| 614 |
+
bint own_file
|
| 615 |
+
|
| 616 |
+
# By implementing these "virtual" functions (all functions in Cython
|
| 617 |
+
# extension classes are technically virtual in the C++ sense) we can expose
|
| 618 |
+
# the arrow::io abstract file interfaces to other components throughout the
|
| 619 |
+
# suite of Arrow C++ libraries
|
| 620 |
+
cdef set_random_access_file(self, shared_ptr[CRandomAccessFile] handle)
|
| 621 |
+
cdef set_input_stream(self, shared_ptr[CInputStream] handle)
|
| 622 |
+
cdef set_output_stream(self, shared_ptr[COutputStream] handle)
|
| 623 |
+
|
| 624 |
+
cdef shared_ptr[CRandomAccessFile] get_random_access_file(self) except *
|
| 625 |
+
cdef shared_ptr[CInputStream] get_input_stream(self) except *
|
| 626 |
+
cdef shared_ptr[COutputStream] get_output_stream(self) except *
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
cdef class BufferedInputStream(NativeFile):
|
| 630 |
+
pass
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
cdef class BufferedOutputStream(NativeFile):
|
| 634 |
+
pass
|
| 635 |
+
|
| 636 |
+
|
| 637 |
+
cdef class CompressedInputStream(NativeFile):
|
| 638 |
+
pass
|
| 639 |
+
|
| 640 |
+
|
| 641 |
+
cdef class CompressedOutputStream(NativeFile):
|
| 642 |
+
pass
|
| 643 |
+
|
| 644 |
+
|
| 645 |
+
cdef class _CRecordBatchWriter(_Weakrefable):
|
| 646 |
+
cdef:
|
| 647 |
+
SharedPtrNoGIL[CRecordBatchWriter] writer
|
| 648 |
+
|
| 649 |
+
|
| 650 |
+
cdef class RecordBatchReader(_Weakrefable):
|
| 651 |
+
cdef:
|
| 652 |
+
SharedPtrNoGIL[CRecordBatchReader] reader
|
| 653 |
+
|
| 654 |
+
|
| 655 |
+
cdef class CacheOptions(_Weakrefable):
|
| 656 |
+
cdef:
|
| 657 |
+
CCacheOptions wrapped
|
| 658 |
+
|
| 659 |
+
cdef void init(self, CCacheOptions options)
|
| 660 |
+
|
| 661 |
+
cdef inline CCacheOptions unwrap(self)
|
| 662 |
+
|
| 663 |
+
@staticmethod
|
| 664 |
+
cdef wrap(const CCacheOptions options)
|
| 665 |
+
|
| 666 |
+
|
| 667 |
+
cdef class Codec(_Weakrefable):
|
| 668 |
+
cdef:
|
| 669 |
+
shared_ptr[CCodec] wrapped
|
| 670 |
+
|
| 671 |
+
cdef inline CCodec* unwrap(self) nogil
|
| 672 |
+
|
| 673 |
+
|
| 674 |
+
# This class is only used internally for now
|
| 675 |
+
cdef class StopToken:
|
| 676 |
+
cdef:
|
| 677 |
+
CStopToken stop_token
|
| 678 |
+
|
| 679 |
+
cdef void init(self, CStopToken stop_token)
|
| 680 |
+
|
| 681 |
+
|
| 682 |
+
cdef get_input_stream(object source, c_bool use_memory_map,
|
| 683 |
+
shared_ptr[CInputStream]* reader)
|
| 684 |
+
cdef get_reader(object source, c_bool use_memory_map,
|
| 685 |
+
shared_ptr[CRandomAccessFile]* reader)
|
| 686 |
+
cdef get_writer(object source, shared_ptr[COutputStream]* writer)
|
| 687 |
+
cdef NativeFile get_native_file(object source, c_bool use_memory_map)
|
| 688 |
+
|
| 689 |
+
cdef shared_ptr[CInputStream] native_transcoding_input_stream(
|
| 690 |
+
shared_ptr[CInputStream] stream, src_encoding,
|
| 691 |
+
dest_encoding) except *
|
| 692 |
+
|
| 693 |
+
cdef shared_ptr[function[StreamWrapFunc]] make_streamwrap_func(
|
| 694 |
+
src_encoding, dest_encoding) except *
|
| 695 |
+
|
| 696 |
+
# Default is allow_none=False
|
| 697 |
+
cpdef DataType ensure_type(object type, bint allow_none=*)
|
| 698 |
+
|
| 699 |
+
cdef timeunit_to_string(TimeUnit unit)
|
| 700 |
+
cdef TimeUnit string_to_timeunit(unit) except *
|
| 701 |
+
|
| 702 |
+
# Exceptions may be raised when converting dict values, so need to
|
| 703 |
+
# check exception state on return
|
| 704 |
+
cdef shared_ptr[const CKeyValueMetadata] pyarrow_unwrap_metadata(
|
| 705 |
+
object meta) except *
|
| 706 |
+
cdef object pyarrow_wrap_metadata(
|
| 707 |
+
const shared_ptr[const CKeyValueMetadata]& meta)
|
| 708 |
+
|
| 709 |
+
#
|
| 710 |
+
# Public Cython API for 3rd party code
|
| 711 |
+
#
|
| 712 |
+
# If you add functions to this list, please also update
|
| 713 |
+
# `cpp/src/arrow/python/pyarrow.{h, cc}`
|
| 714 |
+
#
|
| 715 |
+
|
| 716 |
+
# Wrapping C++ -> Python
|
| 717 |
+
|
| 718 |
+
cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
|
| 719 |
+
cdef public object pyarrow_wrap_resizable_buffer(
|
| 720 |
+
const shared_ptr[CResizableBuffer]& buf)
|
| 721 |
+
|
| 722 |
+
cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
|
| 723 |
+
cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
|
| 724 |
+
cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
|
| 725 |
+
|
| 726 |
+
cdef public object pyarrow_wrap_scalar(const shared_ptr[CScalar]& sp_scalar)
|
| 727 |
+
|
| 728 |
+
cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
|
| 729 |
+
cdef public object pyarrow_wrap_chunked_array(
|
| 730 |
+
const shared_ptr[CChunkedArray]& sp_array)
|
| 731 |
+
|
| 732 |
+
cdef public object pyarrow_wrap_sparse_coo_tensor(
|
| 733 |
+
const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
|
| 734 |
+
cdef public object pyarrow_wrap_sparse_csc_matrix(
|
| 735 |
+
const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
|
| 736 |
+
cdef public object pyarrow_wrap_sparse_csf_tensor(
|
| 737 |
+
const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
|
| 738 |
+
cdef public object pyarrow_wrap_sparse_csr_matrix(
|
| 739 |
+
const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
|
| 740 |
+
cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
|
| 741 |
+
|
| 742 |
+
cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
|
| 743 |
+
cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
|
| 744 |
+
|
| 745 |
+
# Unwrapping Python -> C++
|
| 746 |
+
|
| 747 |
+
cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
|
| 748 |
+
|
| 749 |
+
cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
|
| 750 |
+
cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
|
| 751 |
+
cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
|
| 752 |
+
|
| 753 |
+
cdef public shared_ptr[CScalar] pyarrow_unwrap_scalar(object scalar)
|
| 754 |
+
|
| 755 |
+
cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
|
| 756 |
+
cdef public shared_ptr[CChunkedArray] pyarrow_unwrap_chunked_array(
|
| 757 |
+
object array)
|
| 758 |
+
|
| 759 |
+
cdef public shared_ptr[CSparseCOOTensor] pyarrow_unwrap_sparse_coo_tensor(
|
| 760 |
+
object sparse_tensor)
|
| 761 |
+
cdef public shared_ptr[CSparseCSCMatrix] pyarrow_unwrap_sparse_csc_matrix(
|
| 762 |
+
object sparse_tensor)
|
| 763 |
+
cdef public shared_ptr[CSparseCSFTensor] pyarrow_unwrap_sparse_csf_tensor(
|
| 764 |
+
object sparse_tensor)
|
| 765 |
+
cdef public shared_ptr[CSparseCSRMatrix] pyarrow_unwrap_sparse_csr_matrix(
|
| 766 |
+
object sparse_tensor)
|
| 767 |
+
cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
|
| 768 |
+
|
| 769 |
+
cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
|
| 770 |
+
cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib_api.h
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Generated by Cython 3.0.12 */
|
| 2 |
+
|
| 3 |
+
#ifndef __PYX_HAVE_API__pyarrow__lib
|
| 4 |
+
#define __PYX_HAVE_API__pyarrow__lib
|
| 5 |
+
#ifdef __MINGW64__
|
| 6 |
+
#define MS_WIN64
|
| 7 |
+
#endif
|
| 8 |
+
#include "Python.h"
|
| 9 |
+
#include "lib.h"
|
| 10 |
+
|
| 11 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_box_memory_pool)( arrow::MemoryPool *) = 0;
|
| 12 |
+
#define box_memory_pool __pyx_api_f_7pyarrow_3lib_box_memory_pool
|
| 13 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer> const &) = 0;
|
| 14 |
+
#define pyarrow_wrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer
|
| 15 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer)(std::shared_ptr< arrow::ResizableBuffer> const &) = 0;
|
| 16 |
+
#define pyarrow_wrap_resizable_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer
|
| 17 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type)(std::shared_ptr< arrow::DataType> const &) = 0;
|
| 18 |
+
#define pyarrow_wrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type
|
| 19 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field)(std::shared_ptr< arrow::Field> const &) = 0;
|
| 20 |
+
#define pyarrow_wrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field
|
| 21 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema)(std::shared_ptr< arrow::Schema> const &) = 0;
|
| 22 |
+
#define pyarrow_wrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema
|
| 23 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar)(std::shared_ptr< arrow::Scalar> const &) = 0;
|
| 24 |
+
#define pyarrow_wrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar
|
| 25 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array)(std::shared_ptr< arrow::Array> const &) = 0;
|
| 26 |
+
#define pyarrow_wrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array
|
| 27 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array)(std::shared_ptr< arrow::ChunkedArray> const &) = 0;
|
| 28 |
+
#define pyarrow_wrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array
|
| 29 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor)(std::shared_ptr< arrow::SparseCOOTensor> const &) = 0;
|
| 30 |
+
#define pyarrow_wrap_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor
|
| 31 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix)(std::shared_ptr< arrow::SparseCSCMatrix> const &) = 0;
|
| 32 |
+
#define pyarrow_wrap_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix
|
| 33 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor)(std::shared_ptr< arrow::SparseCSFTensor> const &) = 0;
|
| 34 |
+
#define pyarrow_wrap_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor
|
| 35 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix)(std::shared_ptr< arrow::SparseCSRMatrix> const &) = 0;
|
| 36 |
+
#define pyarrow_wrap_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix
|
| 37 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor)(std::shared_ptr< arrow::Tensor> const &) = 0;
|
| 38 |
+
#define pyarrow_wrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor
|
| 39 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr< arrow::RecordBatch> const &) = 0;
|
| 40 |
+
#define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
|
| 41 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table)(std::shared_ptr< arrow::Table> const &) = 0;
|
| 42 |
+
#define pyarrow_wrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table
|
| 43 |
+
static std::shared_ptr< arrow::Buffer> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer)(PyObject *) = 0;
|
| 44 |
+
#define pyarrow_unwrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer
|
| 45 |
+
static std::shared_ptr< arrow::DataType> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type)(PyObject *) = 0;
|
| 46 |
+
#define pyarrow_unwrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type
|
| 47 |
+
static std::shared_ptr< arrow::Field> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field)(PyObject *) = 0;
|
| 48 |
+
#define pyarrow_unwrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field
|
| 49 |
+
static std::shared_ptr< arrow::Schema> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema)(PyObject *) = 0;
|
| 50 |
+
#define pyarrow_unwrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema
|
| 51 |
+
static std::shared_ptr< arrow::Scalar> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar)(PyObject *) = 0;
|
| 52 |
+
#define pyarrow_unwrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar
|
| 53 |
+
static std::shared_ptr< arrow::Array> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array)(PyObject *) = 0;
|
| 54 |
+
#define pyarrow_unwrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array
|
| 55 |
+
static std::shared_ptr< arrow::ChunkedArray> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array)(PyObject *) = 0;
|
| 56 |
+
#define pyarrow_unwrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array
|
| 57 |
+
static std::shared_ptr< arrow::SparseCOOTensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor)(PyObject *) = 0;
|
| 58 |
+
#define pyarrow_unwrap_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor
|
| 59 |
+
static std::shared_ptr< arrow::SparseCSCMatrix> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix)(PyObject *) = 0;
|
| 60 |
+
#define pyarrow_unwrap_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix
|
| 61 |
+
static std::shared_ptr< arrow::SparseCSFTensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor)(PyObject *) = 0;
|
| 62 |
+
#define pyarrow_unwrap_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor
|
| 63 |
+
static std::shared_ptr< arrow::SparseCSRMatrix> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix)(PyObject *) = 0;
|
| 64 |
+
#define pyarrow_unwrap_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix
|
| 65 |
+
static std::shared_ptr< arrow::Tensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor)(PyObject *) = 0;
|
| 66 |
+
#define pyarrow_unwrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor
|
| 67 |
+
static std::shared_ptr< arrow::RecordBatch> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch)(PyObject *) = 0;
|
| 68 |
+
#define pyarrow_unwrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch
|
| 69 |
+
static std::shared_ptr< arrow::Table> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table)(PyObject *) = 0;
|
| 70 |
+
#define pyarrow_unwrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table
|
| 71 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status)(arrow::Status const &) = 0;
|
| 72 |
+
#define pyarrow_internal_check_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status
|
| 73 |
+
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status)(arrow::Status const &) = 0;
|
| 74 |
+
#define pyarrow_internal_convert_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status
|
| 75 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer)(PyObject *) = 0;
|
| 76 |
+
#define pyarrow_is_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer
|
| 77 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type)(PyObject *) = 0;
|
| 78 |
+
#define pyarrow_is_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type
|
| 79 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata)(PyObject *) = 0;
|
| 80 |
+
#define pyarrow_is_metadata __pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata
|
| 81 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_field)(PyObject *) = 0;
|
| 82 |
+
#define pyarrow_is_field __pyx_api_f_7pyarrow_3lib_pyarrow_is_field
|
| 83 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema)(PyObject *) = 0;
|
| 84 |
+
#define pyarrow_is_schema __pyx_api_f_7pyarrow_3lib_pyarrow_is_schema
|
| 85 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_array)(PyObject *) = 0;
|
| 86 |
+
#define pyarrow_is_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_array
|
| 87 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array)(PyObject *) = 0;
|
| 88 |
+
#define pyarrow_is_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array
|
| 89 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar)(PyObject *) = 0;
|
| 90 |
+
#define pyarrow_is_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar
|
| 91 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor)(PyObject *) = 0;
|
| 92 |
+
#define pyarrow_is_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor
|
| 93 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor)(PyObject *) = 0;
|
| 94 |
+
#define pyarrow_is_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor
|
| 95 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix)(PyObject *) = 0;
|
| 96 |
+
#define pyarrow_is_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix
|
| 97 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix)(PyObject *) = 0;
|
| 98 |
+
#define pyarrow_is_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix
|
| 99 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor)(PyObject *) = 0;
|
| 100 |
+
#define pyarrow_is_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor
|
| 101 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_table)(PyObject *) = 0;
|
| 102 |
+
#define pyarrow_is_table __pyx_api_f_7pyarrow_3lib_pyarrow_is_table
|
| 103 |
+
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch)(PyObject *) = 0;
|
| 104 |
+
#define pyarrow_is_batch __pyx_api_f_7pyarrow_3lib_pyarrow_is_batch
|
| 105 |
+
#ifndef __PYX_HAVE_RT_ImportFunction_3_0_12
|
| 106 |
+
#define __PYX_HAVE_RT_ImportFunction_3_0_12
|
| 107 |
+
static int __Pyx_ImportFunction_3_0_12(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
| 108 |
+
PyObject *d = 0;
|
| 109 |
+
PyObject *cobj = 0;
|
| 110 |
+
union {
|
| 111 |
+
void (*fp)(void);
|
| 112 |
+
void *p;
|
| 113 |
+
} tmp;
|
| 114 |
+
d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
|
| 115 |
+
if (!d)
|
| 116 |
+
goto bad;
|
| 117 |
+
cobj = PyDict_GetItemString(d, funcname);
|
| 118 |
+
if (!cobj) {
|
| 119 |
+
PyErr_Format(PyExc_ImportError,
|
| 120 |
+
"%.200s does not export expected C function %.200s",
|
| 121 |
+
PyModule_GetName(module), funcname);
|
| 122 |
+
goto bad;
|
| 123 |
+
}
|
| 124 |
+
if (!PyCapsule_IsValid(cobj, sig)) {
|
| 125 |
+
PyErr_Format(PyExc_TypeError,
|
| 126 |
+
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
| 127 |
+
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
|
| 128 |
+
goto bad;
|
| 129 |
+
}
|
| 130 |
+
tmp.p = PyCapsule_GetPointer(cobj, sig);
|
| 131 |
+
*f = tmp.fp;
|
| 132 |
+
if (!(*f))
|
| 133 |
+
goto bad;
|
| 134 |
+
Py_DECREF(d);
|
| 135 |
+
return 0;
|
| 136 |
+
bad:
|
| 137 |
+
Py_XDECREF(d);
|
| 138 |
+
return -1;
|
| 139 |
+
}
|
| 140 |
+
#endif
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
static int import_pyarrow__lib(void) {
|
| 144 |
+
PyObject *module = 0;
|
| 145 |
+
module = PyImport_ImportModule("pyarrow.lib");
|
| 146 |
+
if (!module) goto bad;
|
| 147 |
+
if (__Pyx_ImportFunction_3_0_12(module, "box_memory_pool", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_box_memory_pool, "PyObject *( arrow::MemoryPool *)") < 0) goto bad;
|
| 148 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer> const &)") < 0) goto bad;
|
| 149 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_resizable_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer, "PyObject *(std::shared_ptr< arrow::ResizableBuffer> const &)") < 0) goto bad;
|
| 150 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType> const &)") < 0) goto bad;
|
| 151 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field> const &)") < 0) goto bad;
|
| 152 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema> const &)") < 0) goto bad;
|
| 153 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar> const &)") < 0) goto bad;
|
| 154 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array> const &)") < 0) goto bad;
|
| 155 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray> const &)") < 0) goto bad;
|
| 156 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor, "PyObject *(std::shared_ptr< arrow::SparseCOOTensor> const &)") < 0) goto bad;
|
| 157 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSCMatrix> const &)") < 0) goto bad;
|
| 158 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor, "PyObject *(std::shared_ptr< arrow::SparseCSFTensor> const &)") < 0) goto bad;
|
| 159 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSRMatrix> const &)") < 0) goto bad;
|
| 160 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor> const &)") < 0) goto bad;
|
| 161 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch> const &)") < 0) goto bad;
|
| 162 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table> const &)") < 0) goto bad;
|
| 163 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer, "std::shared_ptr< arrow::Buffer> (PyObject *)") < 0) goto bad;
|
| 164 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type, "std::shared_ptr< arrow::DataType> (PyObject *)") < 0) goto bad;
|
| 165 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field, "std::shared_ptr< arrow::Field> (PyObject *)") < 0) goto bad;
|
| 166 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema> (PyObject *)") < 0) goto bad;
|
| 167 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar, "std::shared_ptr< arrow::Scalar> (PyObject *)") < 0) goto bad;
|
| 168 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array, "std::shared_ptr< arrow::Array> (PyObject *)") < 0) goto bad;
|
| 169 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array, "std::shared_ptr< arrow::ChunkedArray> (PyObject *)") < 0) goto bad;
|
| 170 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor, "std::shared_ptr< arrow::SparseCOOTensor> (PyObject *)") < 0) goto bad;
|
| 171 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix, "std::shared_ptr< arrow::SparseCSCMatrix> (PyObject *)") < 0) goto bad;
|
| 172 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor, "std::shared_ptr< arrow::SparseCSFTensor> (PyObject *)") < 0) goto bad;
|
| 173 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix, "std::shared_ptr< arrow::SparseCSRMatrix> (PyObject *)") < 0) goto bad;
|
| 174 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor> (PyObject *)") < 0) goto bad;
|
| 175 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch, "std::shared_ptr< arrow::RecordBatch> (PyObject *)") < 0) goto bad;
|
| 176 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table> (PyObject *)") < 0) goto bad;
|
| 177 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad;
|
| 178 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_internal_convert_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status, "PyObject *(arrow::Status const &)") < 0) goto bad;
|
| 179 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad;
|
| 180 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad;
|
| 181 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_metadata", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata, "int (PyObject *)") < 0) goto bad;
|
| 182 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_field, "int (PyObject *)") < 0) goto bad;
|
| 183 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema, "int (PyObject *)") < 0) goto bad;
|
| 184 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad;
|
| 185 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array, "int (PyObject *)") < 0) goto bad;
|
| 186 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar, "int (PyObject *)") < 0) goto bad;
|
| 187 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad;
|
| 188 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor, "int (PyObject *)") < 0) goto bad;
|
| 189 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix, "int (PyObject *)") < 0) goto bad;
|
| 190 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix, "int (PyObject *)") < 0) goto bad;
|
| 191 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor, "int (PyObject *)") < 0) goto bad;
|
| 192 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad;
|
| 193 |
+
if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch, "int (PyObject *)") < 0) goto bad;
|
| 194 |
+
Py_DECREF(module); module = 0;
|
| 195 |
+
return 0;
|
| 196 |
+
bad:
|
| 197 |
+
Py_XDECREF(module);
|
| 198 |
+
return -1;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
#endif /* !__PYX_HAVE_API__pyarrow__lib */
|