diff --git a/aiohttp-3.13.3.dist-info/INSTALLER b/aiohttp-3.13.3.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..5c69047b2eb8235994febeeae1da4a82365a240a --- /dev/null +++ b/aiohttp-3.13.3.dist-info/INSTALLER @@ -0,0 +1 @@ +uv \ No newline at end of file diff --git a/aiohttp-3.13.3.dist-info/METADATA b/aiohttp-3.13.3.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..078765d91eb4b08b23a0d13c7b05a5b4946418b2 --- /dev/null +++ b/aiohttp-3.13.3.dist-info/METADATA @@ -0,0 +1,262 @@ +Metadata-Version: 2.4 +Name: aiohttp +Version: 3.13.3 +Summary: Async http client/server framework (asyncio) +Maintainer-email: aiohttp team +License: Apache-2.0 AND MIT +Project-URL: Homepage, https://github.com/aio-libs/aiohttp +Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org +Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org +Project-URL: CI: GitHub Actions, https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI +Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/aiohttp +Project-URL: Docs: Changelog, https://docs.aiohttp.org/en/stable/changes.html +Project-URL: Docs: RTD, https://docs.aiohttp.org +Project-URL: GitHub: issues, https://github.com/aio-libs/aiohttp/issues +Project-URL: GitHub: repo, https://github.com/aio-libs/aiohttp +Classifier: Development Status :: 5 - Production/Stable +Classifier: Framework :: AsyncIO +Classifier: Intended Audience :: Developers +Classifier: Operating System :: POSIX +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Topic :: Internet :: WWW/HTTP +Requires-Python: >=3.9 +Description-Content-Type: text/x-rst +License-File: LICENSE.txt +License-File: vendor/llhttp/LICENSE +Requires-Dist: aiohappyeyeballs>=2.5.0 +Requires-Dist: aiosignal>=1.4.0 +Requires-Dist: async-timeout<6.0,>=4.0; python_version < "3.11" +Requires-Dist: attrs>=17.3.0 +Requires-Dist: frozenlist>=1.1.1 +Requires-Dist: multidict<7.0,>=4.5 +Requires-Dist: propcache>=0.2.0 +Requires-Dist: yarl<2.0,>=1.17.0 +Provides-Extra: speedups +Requires-Dist: aiodns>=3.3.0; extra == "speedups" +Requires-Dist: Brotli>=1.2; platform_python_implementation == "CPython" and extra == "speedups" +Requires-Dist: brotlicffi>=1.2; platform_python_implementation != "CPython" and extra == "speedups" +Requires-Dist: backports.zstd; (platform_python_implementation == "CPython" and python_version < "3.14") and extra == "speedups" +Dynamic: license-file + +================================== +Async http client/server framework +================================== + +.. image:: https://raw.githubusercontent.com/aio-libs/aiohttp/master/docs/aiohttp-plain.svg + :height: 64px + :width: 64px + :alt: aiohttp logo + +| + +.. image:: https://github.com/aio-libs/aiohttp/workflows/CI/badge.svg + :target: https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI + :alt: GitHub Actions status for master branch + +.. image:: https://codecov.io/gh/aio-libs/aiohttp/branch/master/graph/badge.svg + :target: https://codecov.io/gh/aio-libs/aiohttp + :alt: codecov.io status for master branch + +.. image:: https://badge.fury.io/py/aiohttp.svg + :target: https://pypi.org/project/aiohttp + :alt: Latest PyPI package version + +.. image:: https://img.shields.io/pypi/dm/aiohttp + :target: https://pypistats.org/packages/aiohttp + :alt: Downloads count + +.. image:: https://readthedocs.org/projects/aiohttp/badge/?version=latest + :target: https://docs.aiohttp.org/ + :alt: Latest Read The Docs + +.. image:: https://img.shields.io/endpoint?url=https://codspeed.io/badge.json + :target: https://codspeed.io/aio-libs/aiohttp + :alt: Codspeed.io status for aiohttp + + +Key Features +============ + +- Supports both client and server side of HTTP protocol. +- Supports both client and server Web-Sockets out-of-the-box and avoids + Callback Hell. +- Provides Web-server with middleware and pluggable routing. + + +Getting started +=============== + +Client +------ + +To get something from the web: + +.. code-block:: python + + import aiohttp + import asyncio + + async def main(): + + async with aiohttp.ClientSession() as session: + async with session.get('http://python.org') as response: + + print("Status:", response.status) + print("Content-type:", response.headers['content-type']) + + html = await response.text() + print("Body:", html[:15], "...") + + asyncio.run(main()) + +This prints: + +.. code-block:: + + Status: 200 + Content-type: text/html; charset=utf-8 + Body: ... + +Coming from `requests `_ ? Read `why we need so many lines `_. + +Server +------ + +An example using a simple server: + +.. code-block:: python + + # examples/server_simple.py + from aiohttp import web + + async def handle(request): + name = request.match_info.get('name', "Anonymous") + text = "Hello, " + name + return web.Response(text=text) + + async def wshandle(request): + ws = web.WebSocketResponse() + await ws.prepare(request) + + async for msg in ws: + if msg.type == web.WSMsgType.text: + await ws.send_str("Hello, {}".format(msg.data)) + elif msg.type == web.WSMsgType.binary: + await ws.send_bytes(msg.data) + elif msg.type == web.WSMsgType.close: + break + + return ws + + + app = web.Application() + app.add_routes([web.get('/', handle), + web.get('/echo', wshandle), + web.get('/{name}', handle)]) + + if __name__ == '__main__': + web.run_app(app) + + +Documentation +============= + +https://aiohttp.readthedocs.io/ + + +Demos +===== + +https://github.com/aio-libs/aiohttp-demos + + +External links +============== + +* `Third party libraries + `_ +* `Built with aiohttp + `_ +* `Powered by aiohttp + `_ + +Feel free to make a Pull Request for adding your link to these pages! + + +Communication channels +====================== + +*aio-libs Discussions*: https://github.com/aio-libs/aiohttp/discussions + +*Matrix*: `#aio-libs:matrix.org `_ + +We support `Stack Overflow +`_. +Please add *aiohttp* tag to your question there. + +Requirements +============ + +- attrs_ +- multidict_ +- yarl_ +- frozenlist_ + +Optionally you may install the aiodns_ library (highly recommended for sake of speed). + +.. _aiodns: https://pypi.python.org/pypi/aiodns +.. _attrs: https://github.com/python-attrs/attrs +.. _multidict: https://pypi.python.org/pypi/multidict +.. _frozenlist: https://pypi.org/project/frozenlist/ +.. _yarl: https://pypi.python.org/pypi/yarl +.. _async-timeout: https://pypi.python.org/pypi/async_timeout + +License +======= + +``aiohttp`` is offered under the Apache 2 license. + + +Keepsafe +======== + +The aiohttp community would like to thank Keepsafe +(https://www.getkeepsafe.com) for its support in the early days of +the project. + + +Source code +=========== + +The latest developer version is available in a GitHub repository: +https://github.com/aio-libs/aiohttp + +Benchmarks +========== + +If you are interested in efficiency, the AsyncIO community maintains a +list of benchmarks on the official wiki: +https://github.com/python/asyncio/wiki/Benchmarks + +-------- + +.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat + :target: https://matrix.to/#/%23aio-libs:matrix.org + :alt: Matrix Room — #aio-libs:matrix.org + +.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat + :target: https://matrix.to/#/%23aio-libs-space:matrix.org + :alt: Matrix Space — #aio-libs-space:matrix.org + +.. image:: https://insights.linuxfoundation.org/api/badge/health-score?project=aiohttp + :target: https://insights.linuxfoundation.org/project/aiohttp + :alt: LFX Health Score diff --git a/aiohttp-3.13.3.dist-info/RECORD b/aiohttp-3.13.3.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..a285c5269deff01ab5459572a7b9452762688954 --- /dev/null +++ b/aiohttp-3.13.3.dist-info/RECORD @@ -0,0 +1,84 @@ +aiohttp-3.13.3.dist-info/INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2 +aiohttp-3.13.3.dist-info/METADATA,sha256=CQROZCStho-eb7xiFIuAzj30JuupEU_jHpYDFiG_HhM,8145 +aiohttp-3.13.3.dist-info/RECORD,, +aiohttp-3.13.3.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +aiohttp-3.13.3.dist-info/WHEEL,sha256=DxRnWQz-Kp9-4a4hdDHsSv0KUC3H7sN9Nbef3-8RjXU,190 +aiohttp-3.13.3.dist-info/licenses/LICENSE.txt,sha256=n4DQ2311WpQdtFchcsJw7L2PCCuiFd3QlZhZQu2Uqes,588 +aiohttp-3.13.3.dist-info/licenses/vendor/llhttp/LICENSE,sha256=68qFTgE0zSVtZzYnwgSZ9CV363S6zwi58ltianPJEnc,1105 +aiohttp-3.13.3.dist-info/top_level.txt,sha256=iv-JIaacmTl-hSho3QmphcKnbRRYx1st47yjz_178Ro,8 +aiohttp/.hash/_cparser.pxd.hash,sha256=pjs-sEXNw_eijXGAedwG-BHnlFp8B7sOCgUagIWaU2A,121 +aiohttp/.hash/_find_header.pxd.hash,sha256=_mbpD6vM-CVCKq3ulUvsOAz5Wdo88wrDzfpOsMQaMNA,125 +aiohttp/.hash/_http_parser.pyx.hash,sha256=RKkD9x-EhXksvXrpCaTNWYtffb52urLvuTnxbTN2Lmw,125 +aiohttp/.hash/_http_writer.pyx.hash,sha256=9txOh7t7c3y-vLmiuEY5dltmXvEo0CYyU4U853yyv9E,125 +aiohttp/.hash/hdrs.py.hash,sha256=v6IaKbsxjsdQxBzhb5AjP0x_9G3rUe84D7avf7AI4cs,116 +aiohttp/__init__.py,sha256=QWssFaD-DaFFcwP36lLUQzRmlSZ5KxivJBU-yg5C1wg,8302 +aiohttp/_cookie_helpers.py,sha256=_p7y-B8OCAk7FLjByiuwFIpDLGuNoJn3_vixzymAFnE,13659 +aiohttp/_cparser.pxd,sha256=UnbUYCHg4NdXfgyRVYAMv2KTLWClB4P-xCrvtj_r7ew,4295 +aiohttp/_find_header.pxd,sha256=0GfwFCPN2zxEKTO1_MA5sYq2UfzsG8kcV3aTqvwlz3g,68 +aiohttp/_headers.pxi,sha256=n701k28dVPjwRnx5j6LpJhLTfj7dqu2vJt7f0O60Oyg,2007 +aiohttp/_http_parser.cpython-312-x86_64-linux-gnu.so,sha256=WZP45rtTvKwOq_1uXO_1L84Kz6I0AnqYZn0b5L-6HkA,2833152 +aiohttp/_http_parser.pyx,sha256=-YI8YIY4uKd_7Bwr0o3FwEPwjHdexZ5-Ji3XS067c4Q,28261 +aiohttp/_http_writer.cpython-312-x86_64-linux-gnu.so,sha256=mWC-4rsbntVD1V5ZEKEpSW_sm63V0XRe1fDR0lygipo,539144 +aiohttp/_http_writer.pyx,sha256=VlFEBM6HoVv8a0AAJtc6JwFlsv2-cDE8-gB94p3dfhQ,4664 +aiohttp/_websocket/.hash/mask.pxd.hash,sha256=Y0zBddk_ck3pi9-BFzMcpkcvCKvwvZ4GTtZFb9u1nxQ,128 +aiohttp/_websocket/.hash/mask.pyx.hash,sha256=90owpXYM8_kIma4KUcOxhWSk-Uv4NVMBoCYeFM1B3d0,128 +aiohttp/_websocket/.hash/reader_c.pxd.hash,sha256=5xf3oobk6vx4xbJm-xtZ1_QufB8fYFtLQV2MNdqUc1w,132 +aiohttp/_websocket/__init__.py,sha256=Mar3R9_vBN_Ea4lsW7iTAVXD7OKswKPGqF5xgSyt77k,44 +aiohttp/_websocket/helpers.py,sha256=P-XLv8IUaihKzDenVUqfKU5DJbWE5HvG8uhvUZK8Ic4,5038 +aiohttp/_websocket/mask.cpython-312-x86_64-linux-gnu.so,sha256=EpRwPJm1K1yavMCd9llAWdT4AsqKx_QEN0rb0eJH_Kc,263512 +aiohttp/_websocket/mask.pxd,sha256=sBmZ1Amym9kW4Ge8lj1fLZ7mPPya4LzLdpkQExQXv5M,112 +aiohttp/_websocket/mask.pyx,sha256=BHjOtV0O0w7xp9p0LNADRJvGmgfPn9sGeJvSs0fL__4,1397 +aiohttp/_websocket/models.py,sha256=XAzjs_8JYszWXIgZ6R3ZRrF-tX9Q_6LiD49WRYojopM,2121 +aiohttp/_websocket/reader.py,sha256=eC4qS0c5sOeQ2ebAHLaBpIaTVFaSKX79pY2xvh3Pqyw,1030 +aiohttp/_websocket/reader_c.cpython-312-x86_64-linux-gnu.so,sha256=GYd-y-IkGkOIp3vmma5BmZgmG9Py9fLTybcmMWyHNf0,1822128 +aiohttp/_websocket/reader_c.pxd,sha256=nl_njtDrzlQU0rjgGGjZDB-swguE0tX_bCPobkShVa4,2625 +aiohttp/_websocket/reader_c.py,sha256=V5YtZ2gj2BjE2Q-W9sR_MdAl1VAm1pB7ZjozVJcOpbg,18868 +aiohttp/_websocket/reader_py.py,sha256=V5YtZ2gj2BjE2Q-W9sR_MdAl1VAm1pB7ZjozVJcOpbg,18868 +aiohttp/_websocket/writer.py,sha256=2OvSktPmNh_g20h1cXJt2Xu8u6IvswnPjdur7OwBbJk,11261 +aiohttp/abc.py,sha256=M66F4S6m00bIEn7y4ha_XLTMDmVQ9dPihfOVB0pGfOo,7149 +aiohttp/base_protocol.py,sha256=Tp8cxUPQvv9kUPk3w6lAzk6d2MAzV3scwI_3Go3C47c,3025 +aiohttp/client.py,sha256=fOQfwcIUL1NGAVRV4DDj6-wipBzeD8KZpmzhO-LLKp4,58357 +aiohttp/client_exceptions.py,sha256=uyKbxI2peZhKl7lELBMx3UeusNkfpemPWpGFq0r6JeM,11367 +aiohttp/client_middleware_digest_auth.py,sha256=G5JM9YtzL9AWklz6NP28xEOBeAvrAZgDzU657JqO4qs,17627 +aiohttp/client_middlewares.py,sha256=kP5N9CMzQPMGPIEydeVUiLUTLsw8Vl8Gr4qAWYdu3vM,1918 +aiohttp/client_proto.py,sha256=56_WtLStZGBFPYKzgEgY6v24JkhV1y6JEmmuxeJT2So,12110 +aiohttp/client_reqrep.py,sha256=eEREDrZ0M8ZFTt1wjHduR-P8_sm40K65gNz-iMGYask,53391 +aiohttp/client_ws.py,sha256=1CIjIXwyzOMIYw6AjUES4-qUwbyVHW1seJKQfg_Rta8,15109 +aiohttp/compression_utils.py,sha256=hJ2LXhN2OWukFHm5b78TJFGKcAiL2kthi9Sf5PRYO-U,11738 +aiohttp/connector.py,sha256=vT22BNuCDtbadE1Uq7HC7zpOWCHMxI4n3PtCz7zZZkw,69004 +aiohttp/cookiejar.py,sha256=e28ZMQwJ5P0vbPX1OX4Se7-k3zeGvocFEqzGhwpG53k,18922 +aiohttp/formdata.py,sha256=xqYMbUo1qoLYPuzY92XeR4pyEe-w-DNcToARDF3GUhA,6384 +aiohttp/hdrs.py,sha256=2rj5MyA-6yRdYPhW5UKkW4iNWhEAlGIOSBH5D4FmKNE,5111 +aiohttp/helpers.py,sha256=Q1307PCEnWz4RP8crUw8dk58c0YF2Ei3JywkKfRxz5E,30629 +aiohttp/http.py,sha256=8o8j8xH70OWjnfTWA9V44NR785QPxEPrUtzMXiAVpwc,1842 +aiohttp/http_exceptions.py,sha256=BjIxD4LtrQgytqoR5lOI9zAttNmSygRgksUsMRy7sss,3069 +aiohttp/http_parser.py,sha256=z6djZDOUs7hdPzplTEsAVyz0of-rQAwT7xz8OpXhnuY,38177 +aiohttp/http_websocket.py,sha256=8VXFKw6KQUEmPg48GtRMB37v0gTK7A0inoxXuDxMZEc,842 +aiohttp/http_writer.py,sha256=fbRtKPYSqRbtAdr_gqpjF2-4sI1ESL8dPDF-xY_mAMY,12446 +aiohttp/log.py,sha256=BbNKx9e3VMIm0xYjZI0IcBBoS7wjdeIeSaiJE7-qK2g,325 +aiohttp/multipart.py,sha256=326npYdWxYI3raoRfmpBeUV_ef3-LRn8sV9WqcIOoPk,40482 +aiohttp/payload.py,sha256=O6nsYNULL7AeM2cyJ6TYX73ncVnL5xJwt5AegxwMKqw,40874 +aiohttp/payload_streamer.py,sha256=ZzEYyfzcjGWkVkK3XR2pBthSCSIykYvY3Wr5cGQ2eTc,2211 +aiohttp/py.typed,sha256=sow9soTwP9T_gEAQSVh7Gb8855h04Nwmhs2We-JRgZM,7 +aiohttp/pytest_plugin.py,sha256=z4XwqmsKdyJCKxbGiA5kFf90zcedvomqk4RqjZbhKNk,12901 +aiohttp/resolver.py,sha256=gsrfUpFf8iHlcHfJvY-1fiBHW3PRvRVNb5lNZBg3zlY,10031 +aiohttp/streams.py,sha256=rlwL7ek6CkMMYil_e_EokWv26uHmtzi3lKqlnLNrXCc,23666 +aiohttp/tcp_helpers.py,sha256=BSadqVWaBpMFDRWnhaaR941N9MiDZ7bdTrxgCb0CW-M,961 +aiohttp/test_utils.py,sha256=ZJSzZWjC76KSbtwddTKcP6vHpUl_ozfAf3F93ewmHRU,23016 +aiohttp/tracing.py,sha256=-6aaW6l0J9uJD45LzR4cijYH0j62pt0U_nn_aVzFku4,14558 +aiohttp/typedefs.py,sha256=wUlqwe9Mw9W8jT3HsYJcYk00qP3EMPz3nTkYXmeNN48,1657 +aiohttp/web.py,sha256=JzSNmejg5G6YeFAnkIgZfytqbU86sNu844yYKmoUpqs,17852 +aiohttp/web_app.py,sha256=lGU_aAMN-h3wy-LTTHi6SeKH8ydt1G51BXcCspgD5ZA,19452 +aiohttp/web_exceptions.py,sha256=7nIuiwhZ39vJJ9KrWqArA5QcWbUdqkz2CLwEpJapeN8,10360 +aiohttp/web_fileresponse.py,sha256=Xzau8EMrWNrFg3u46h4UEteg93G4zYq94CU6vy0HiqE,16362 +aiohttp/web_log.py,sha256=rX5D7xLOX2B6BMdiZ-chme_KfJfW5IXEoFwLfkfkajs,7865 +aiohttp/web_middlewares.py,sha256=sFI0AgeNjdyAjuz92QtMIpngmJSOxrqe2Jfbs4BNUu0,4165 +aiohttp/web_protocol.py,sha256=6s9dMzmaqW77bzM1T111uGNSLFo6gNmfDg7XzYnA8xk,27010 +aiohttp/web_request.py,sha256=KqrOp6AeWB5e6tKrG55Lo7Zbwq49DxdrKniuW2t2u04,29849 +aiohttp/web_response.py,sha256=PKcziNU4LmftXqKVvoRMrAbOeVClpSN-iznHsiWezmU,29341 +aiohttp/web_routedef.py,sha256=VT1GAx6BrawoDh5RwBwBu5wSABSqgWwAe74AUCyZAEo,6110 +aiohttp/web_runner.py,sha256=v1G1nKiOOQgFnTSR4IMc6I9ReEFDMaHtMLvO_roDM-A,11786 +aiohttp/web_server.py,sha256=-9WDKUAiR9ll-rSdwXSqG6YjaoW79d1R4y0BGSqgUMA,2888 +aiohttp/web_urldispatcher.py,sha256=JM-TlriKCNbTLNL43Ra9sdZ0zChxZmIEYQM6ZpbyjI4,44290 +aiohttp/web_ws.py,sha256=lItgmyatkXh0M6EY7JoZnSZkUl6R0wv8B88X4ILqQbU,22739 +aiohttp/worker.py,sha256=zT0iWN5Xze194bO6_VjHou0x7lR_k0MviN6Kadnk22g,8152 diff --git a/aiohttp-3.13.3.dist-info/REQUESTED b/aiohttp-3.13.3.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/aiohttp-3.13.3.dist-info/WHEEL b/aiohttp-3.13.3.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..f3e8a970f16adf4526f1722547053522d94bf860 --- /dev/null +++ b/aiohttp-3.13.3.dist-info/WHEEL @@ -0,0 +1,7 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp312-cp312-manylinux_2_17_x86_64 +Tag: cp312-cp312-manylinux2014_x86_64 +Tag: cp312-cp312-manylinux_2_28_x86_64 + diff --git a/aiohttp-3.13.3.dist-info/top_level.txt b/aiohttp-3.13.3.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee4ba4f3d739e094878215c84eb41ba85c80e4a8 --- /dev/null +++ b/aiohttp-3.13.3.dist-info/top_level.txt @@ -0,0 +1 @@ +aiohttp diff --git a/aiohttp/_cookie_helpers.py b/aiohttp/_cookie_helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..10e2e0eff9de9d28ecbfe511a1227a1fec1d83cd --- /dev/null +++ b/aiohttp/_cookie_helpers.py @@ -0,0 +1,338 @@ +""" +Internal cookie handling helpers. + +This module contains internal utilities for cookie parsing and manipulation. +These are not part of the public API and may change without notice. +""" + +import re +from http.cookies import Morsel +from typing import List, Optional, Sequence, Tuple, cast + +from .log import internal_logger + +__all__ = ( + "parse_set_cookie_headers", + "parse_cookie_header", + "preserve_morsel_with_coded_value", +) + +# Cookie parsing constants +# Allow more characters in cookie names to handle real-world cookies +# that don't strictly follow RFC standards (fixes #2683) +# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2, +# but many servers send cookies with characters like {} [] () etc. +# This makes the cookie parser more tolerant of real-world cookies +# while still providing some validation to catch obviously malformed names. +_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") +_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved + ( + "path", + "domain", + "max-age", + "expires", + "secure", + "httponly", + "samesite", + "partitioned", + "version", + "comment", + ) +) +_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags + ("secure", "httponly", "partitioned") +) + +# SimpleCookie's pattern for parsing cookies with relaxed validation +# Based on http.cookies pattern but extended to allow more characters in cookie names +# to handle real-world cookies (fixes #2683) +_COOKIE_PATTERN = re.compile( + r""" + \s* # Optional whitespace at start of cookie + (?P # Start of group 'key' + # aiohttp has extended to include [] for compatibility with real-world cookies + [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\[\]]+ # Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) + | # or + "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) + | # or + # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 + (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) + [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format + (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 + # NOTE: RFC 2822 timezone support is an aiohttp extension + # for issue #4493 - SimpleCookie does NOT support this + | # or + # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" + # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format + \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} + | # or + [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string + ) # End of group 'val' + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, + re.VERBOSE | re.ASCII, +) + + +def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: + """ + Preserve a Morsel's coded_value exactly as received from the server. + + This function ensures that cookie encoding is preserved exactly as sent by + the server, which is critical for compatibility with old servers that have + strict requirements about cookie formats. + + This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 + where Python's SimpleCookie would re-encode cookies, breaking authentication + with certain servers. + + Args: + cookie: A Morsel object from SimpleCookie + + Returns: + A Morsel object with preserved coded_value + + """ + mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) + # We use __setstate__ instead of the public set() API because it allows us to + # bypass validation and set already validated state. This is more stable than + # setting protected attributes directly and unlikely to change since it would + # break pickling. + mrsl_val.__setstate__( # type: ignore[attr-defined] + {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} + ) + return mrsl_val + + +_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub + + +def _unquote_replace(m: re.Match[str]) -> str: + """ + Replace function for _unquote_sub regex substitution. + + Handles escaped characters in cookie values: + - Octal sequences are converted to their character representation + - Other escaped characters are unescaped by removing the backslash + """ + if m[1]: + return chr(int(m[1], 8)) + return m[2] + + +def _unquote(value: str) -> str: + """ + Unquote a cookie value. + + Vendored from http.cookies._unquote to ensure compatibility. + + Note: The original implementation checked for None, but we've removed + that check since all callers already ensure the value is not None. + """ + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if len(value) < 2: + return value + if value[0] != '"' or value[-1] != '"': + return value + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + value = value[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + return _unquote_sub(_unquote_replace, value) + + +def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]: + """ + Parse a Cookie header according to RFC 6265 Section 5.4. + + Cookie headers contain only name-value pairs separated by semicolons. + There are no attributes in Cookie headers - even names that match + attribute names (like 'path' or 'secure') should be treated as cookies. + + This parser uses the same regex-based approach as parse_set_cookie_headers + to properly handle quoted values that may contain semicolons. When the + regex fails to match a malformed cookie, it falls back to simple parsing + to ensure subsequent cookies are not lost + https://github.com/aio-libs/aiohttp/issues/11632 + + Args: + header: The Cookie header value to parse + + Returns: + List of (name, Morsel) tuples for compatibility with SimpleCookie.update() + """ + if not header: + return [] + + cookies: List[Tuple[str, Morsel[str]]] = [] + morsel: Morsel[str] + i = 0 + n = len(header) + + invalid_names = [] + while i < n: + # Use the same pattern as parse_set_cookie_headers to find cookies + match = _COOKIE_PATTERN.match(header, i) + if not match: + # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632 + # Find next semicolon to skip or attempt simple key=value parsing + next_semi = header.find(";", i) + eq_pos = header.find("=", i) + + # Try to extract key=value if '=' comes before ';' + if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi): + end_pos = next_semi if next_semi != -1 else n + key = header[i:eq_pos].strip() + value = header[eq_pos + 1 : end_pos].strip() + + # Validate the name (same as regex path) + if not _COOKIE_NAME_RE.match(key): + invalid_names.append(key) + else: + morsel = Morsel() + morsel.__setstate__( # type: ignore[attr-defined] + {"key": key, "value": _unquote(value), "coded_value": value} + ) + cookies.append((key, morsel)) + + # Move to next cookie or end + i = next_semi + 1 if next_semi != -1 else n + continue + + key = match.group("key") + value = match.group("val") or "" + i = match.end(0) + + # Validate the name + if not key or not _COOKIE_NAME_RE.match(key): + invalid_names.append(key) + continue + + # Create new morsel + morsel = Morsel() + # Preserve the original value as coded_value (with quotes if present) + # We use __setstate__ instead of the public set() API because it allows us to + # bypass validation and set already validated state. This is more stable than + # setting protected attributes directly and unlikely to change since it would + # break pickling. + morsel.__setstate__( # type: ignore[attr-defined] + {"key": key, "value": _unquote(value), "coded_value": value} + ) + + cookies.append((key, morsel)) + + if invalid_names: + internal_logger.debug( + "Cannot load cookie. Illegal cookie names: %r", invalid_names + ) + + return cookies + + +def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]: + """ + Parse cookie headers using a vendored version of SimpleCookie parsing. + + This implementation is based on SimpleCookie.__parse_string to ensure + compatibility with how SimpleCookie parses cookies, including handling + of malformed cookies with missing semicolons. + + This function is used for both Cookie and Set-Cookie headers in order to be + forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie + headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the + real world data makes it impossible since we need to be a bit more forgiving. + + NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. + SimpleCookie will stop parsing when it encounters a cookie value with an unmatched + quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. + This implementation handles unmatched quotes more gracefully to prevent cookie loss. + See https://github.com/aio-libs/aiohttp/issues/7993 + """ + parsed_cookies: List[Tuple[str, Morsel[str]]] = [] + + for header in headers: + if not header: + continue + + # Parse cookie string using SimpleCookie's algorithm + i = 0 + n = len(header) + current_morsel: Optional[Morsel[str]] = None + morsel_seen = False + + while 0 <= i < n: + # Start looking for a cookie + match = _COOKIE_PATTERN.match(header, i) + if not match: + # No more cookies + break + + key, value = match.group("key"), match.group("val") + i = match.end(0) + lower_key = key.lower() + + if key[0] == "$": + if not morsel_seen: + # We ignore attributes which pertain to the cookie + # mechanism as a whole, such as "$Version". + continue + # Process as attribute + if current_morsel is not None: + attr_lower_key = lower_key[1:] + if attr_lower_key in _COOKIE_KNOWN_ATTRS: + current_morsel[attr_lower_key] = value or "" + elif lower_key in _COOKIE_KNOWN_ATTRS: + if not morsel_seen: + # Invalid cookie string - attribute before cookie + break + if lower_key in _COOKIE_BOOL_ATTRS: + # Boolean attribute with any value should be True + if current_morsel is not None and current_morsel.isReservedKey(key): + current_morsel[lower_key] = True + elif value is None: + # Invalid cookie string - non-boolean attribute without value + break + elif current_morsel is not None: + # Regular attribute with value + current_morsel[lower_key] = _unquote(value) + elif value is not None: + # This is a cookie name=value pair + # Validate the name + if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): + internal_logger.warning( + "Can not load cookies: Illegal cookie name %r", key + ) + current_morsel = None + else: + # Create new morsel + current_morsel = Morsel() + # Preserve the original value as coded_value (with quotes if present) + # We use __setstate__ instead of the public set() API because it allows us to + # bypass validation and set already validated state. This is more stable than + # setting protected attributes directly and unlikely to change since it would + # break pickling. + current_morsel.__setstate__( # type: ignore[attr-defined] + {"key": key, "value": _unquote(value), "coded_value": value} + ) + parsed_cookies.append((key, current_morsel)) + morsel_seen = True + else: + # Invalid cookie string - no value for non-attribute + break + + return parsed_cookies diff --git a/aiohttp/_cparser.pxd b/aiohttp/_cparser.pxd new file mode 100644 index 0000000000000000000000000000000000000000..1b3be6d4efb682bca9397da34f8e727b381bc84f --- /dev/null +++ b/aiohttp/_cparser.pxd @@ -0,0 +1,158 @@ +from libc.stdint cimport int32_t, uint8_t, uint16_t, uint64_t + + +cdef extern from "llhttp.h": + + struct llhttp__internal_s: + int32_t _index + void* _span_pos0 + void* _span_cb0 + int32_t error + const char* reason + const char* error_pos + void* data + void* _current + uint64_t content_length + uint8_t type + uint8_t method + uint8_t http_major + uint8_t http_minor + uint8_t header_state + uint8_t lenient_flags + uint8_t upgrade + uint8_t finish + uint16_t flags + uint16_t status_code + void* settings + + ctypedef llhttp__internal_s llhttp__internal_t + ctypedef llhttp__internal_t llhttp_t + + ctypedef int (*llhttp_data_cb)(llhttp_t*, const char *at, size_t length) except -1 + ctypedef int (*llhttp_cb)(llhttp_t*) except -1 + + struct llhttp_settings_s: + llhttp_cb on_message_begin + llhttp_data_cb on_url + llhttp_data_cb on_status + llhttp_data_cb on_header_field + llhttp_data_cb on_header_value + llhttp_cb on_headers_complete + llhttp_data_cb on_body + llhttp_cb on_message_complete + llhttp_cb on_chunk_header + llhttp_cb on_chunk_complete + + llhttp_cb on_url_complete + llhttp_cb on_status_complete + llhttp_cb on_header_field_complete + llhttp_cb on_header_value_complete + + ctypedef llhttp_settings_s llhttp_settings_t + + enum llhttp_errno: + HPE_OK, + HPE_INTERNAL, + HPE_STRICT, + HPE_LF_EXPECTED, + HPE_UNEXPECTED_CONTENT_LENGTH, + HPE_CLOSED_CONNECTION, + HPE_INVALID_METHOD, + HPE_INVALID_URL, + HPE_INVALID_CONSTANT, + HPE_INVALID_VERSION, + HPE_INVALID_HEADER_TOKEN, + HPE_INVALID_CONTENT_LENGTH, + HPE_INVALID_CHUNK_SIZE, + HPE_INVALID_STATUS, + HPE_INVALID_EOF_STATE, + HPE_INVALID_TRANSFER_ENCODING, + HPE_CB_MESSAGE_BEGIN, + HPE_CB_HEADERS_COMPLETE, + HPE_CB_MESSAGE_COMPLETE, + HPE_CB_CHUNK_HEADER, + HPE_CB_CHUNK_COMPLETE, + HPE_PAUSED, + HPE_PAUSED_UPGRADE, + HPE_USER + + ctypedef llhttp_errno llhttp_errno_t + + enum llhttp_flags: + F_CHUNKED, + F_CONTENT_LENGTH + + enum llhttp_type: + HTTP_REQUEST, + HTTP_RESPONSE, + HTTP_BOTH + + enum llhttp_method: + HTTP_DELETE, + HTTP_GET, + HTTP_HEAD, + HTTP_POST, + HTTP_PUT, + HTTP_CONNECT, + HTTP_OPTIONS, + HTTP_TRACE, + HTTP_COPY, + HTTP_LOCK, + HTTP_MKCOL, + HTTP_MOVE, + HTTP_PROPFIND, + HTTP_PROPPATCH, + HTTP_SEARCH, + HTTP_UNLOCK, + HTTP_BIND, + HTTP_REBIND, + HTTP_UNBIND, + HTTP_ACL, + HTTP_REPORT, + HTTP_MKACTIVITY, + HTTP_CHECKOUT, + HTTP_MERGE, + HTTP_MSEARCH, + HTTP_NOTIFY, + HTTP_SUBSCRIBE, + HTTP_UNSUBSCRIBE, + HTTP_PATCH, + HTTP_PURGE, + HTTP_MKCALENDAR, + HTTP_LINK, + HTTP_UNLINK, + HTTP_SOURCE, + HTTP_PRI, + HTTP_DESCRIBE, + HTTP_ANNOUNCE, + HTTP_SETUP, + HTTP_PLAY, + HTTP_PAUSE, + HTTP_TEARDOWN, + HTTP_GET_PARAMETER, + HTTP_SET_PARAMETER, + HTTP_REDIRECT, + HTTP_RECORD, + HTTP_FLUSH + + ctypedef llhttp_method llhttp_method_t; + + void llhttp_settings_init(llhttp_settings_t* settings) + void llhttp_init(llhttp_t* parser, llhttp_type type, + const llhttp_settings_t* settings) + + llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len) + + int llhttp_should_keep_alive(const llhttp_t* parser) + + void llhttp_resume_after_upgrade(llhttp_t* parser) + + llhttp_errno_t llhttp_get_errno(const llhttp_t* parser) + const char* llhttp_get_error_reason(const llhttp_t* parser) + const char* llhttp_get_error_pos(const llhttp_t* parser) + + const char* llhttp_method_name(llhttp_method_t method) + + void llhttp_set_lenient_headers(llhttp_t* parser, int enabled) + void llhttp_set_lenient_optional_cr_before_lf(llhttp_t* parser, int enabled) + void llhttp_set_lenient_spaces_after_chunk_size(llhttp_t* parser, int enabled) diff --git a/aiohttp/abc.py b/aiohttp/abc.py new file mode 100644 index 0000000000000000000000000000000000000000..faf09575afbb2d6842ad639dada0fdebc2e28945 --- /dev/null +++ b/aiohttp/abc.py @@ -0,0 +1,268 @@ +import asyncio +import logging +import socket +from abc import ABC, abstractmethod +from collections.abc import Sized +from http.cookies import BaseCookie, Morsel +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + Generator, + Iterable, + List, + Optional, + Sequence, + Tuple, + TypedDict, + Union, +) + +from multidict import CIMultiDict +from yarl import URL + +from ._cookie_helpers import parse_set_cookie_headers +from .typedefs import LooseCookies + +if TYPE_CHECKING: + from .web_app import Application + from .web_exceptions import HTTPException + from .web_request import BaseRequest, Request + from .web_response import StreamResponse +else: + BaseRequest = Request = Application = StreamResponse = None + HTTPException = None + + +class AbstractRouter(ABC): + def __init__(self) -> None: + self._frozen = False + + def post_init(self, app: Application) -> None: + """Post init stage. + + Not an abstract method for sake of backward compatibility, + but if the router wants to be aware of the application + it can override this. + """ + + @property + def frozen(self) -> bool: + return self._frozen + + def freeze(self) -> None: + """Freeze router.""" + self._frozen = True + + @abstractmethod + async def resolve(self, request: Request) -> "AbstractMatchInfo": + """Return MATCH_INFO for given request""" + + +class AbstractMatchInfo(ABC): + + __slots__ = () + + @property # pragma: no branch + @abstractmethod + def handler(self) -> Callable[[Request], Awaitable[StreamResponse]]: + """Execute matched request handler""" + + @property + @abstractmethod + def expect_handler( + self, + ) -> Callable[[Request], Awaitable[Optional[StreamResponse]]]: + """Expect handler for 100-continue processing""" + + @property # pragma: no branch + @abstractmethod + def http_exception(self) -> Optional[HTTPException]: + """HTTPException instance raised on router's resolving, or None""" + + @abstractmethod # pragma: no branch + def get_info(self) -> Dict[str, Any]: + """Return a dict with additional info useful for introspection""" + + @property # pragma: no branch + @abstractmethod + def apps(self) -> Tuple[Application, ...]: + """Stack of nested applications. + + Top level application is left-most element. + + """ + + @abstractmethod + def add_app(self, app: Application) -> None: + """Add application to the nested apps stack.""" + + @abstractmethod + def freeze(self) -> None: + """Freeze the match info. + + The method is called after route resolution. + + After the call .add_app() is forbidden. + + """ + + +class AbstractView(ABC): + """Abstract class based view.""" + + def __init__(self, request: Request) -> None: + self._request = request + + @property + def request(self) -> Request: + """Request instance.""" + return self._request + + @abstractmethod + def __await__(self) -> Generator[None, None, StreamResponse]: + """Execute the view handler.""" + + +class ResolveResult(TypedDict): + """Resolve result. + + This is the result returned from an AbstractResolver's + resolve method. + + :param hostname: The hostname that was provided. + :param host: The IP address that was resolved. + :param port: The port that was resolved. + :param family: The address family that was resolved. + :param proto: The protocol that was resolved. + :param flags: The flags that were resolved. + """ + + hostname: str + host: str + port: int + family: int + proto: int + flags: int + + +class AbstractResolver(ABC): + """Abstract DNS resolver.""" + + @abstractmethod + async def resolve( + self, host: str, port: int = 0, family: socket.AddressFamily = socket.AF_INET + ) -> List[ResolveResult]: + """Return IP address for given hostname""" + + @abstractmethod + async def close(self) -> None: + """Release resolver""" + + +if TYPE_CHECKING: + IterableBase = Iterable[Morsel[str]] +else: + IterableBase = Iterable + + +ClearCookiePredicate = Callable[["Morsel[str]"], bool] + + +class AbstractCookieJar(Sized, IterableBase): + """Abstract Cookie Jar.""" + + def __init__(self, *, loop: Optional[asyncio.AbstractEventLoop] = None) -> None: + self._loop = loop or asyncio.get_running_loop() + + @property + @abstractmethod + def quote_cookie(self) -> bool: + """Return True if cookies should be quoted.""" + + @abstractmethod + def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: + """Clear all cookies if no predicate is passed.""" + + @abstractmethod + def clear_domain(self, domain: str) -> None: + """Clear all cookies for domain and all subdomains.""" + + @abstractmethod + def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> None: + """Update cookies.""" + + def update_cookies_from_headers( + self, headers: Sequence[str], response_url: URL + ) -> None: + """Update cookies from raw Set-Cookie headers.""" + if headers and (cookies_to_update := parse_set_cookie_headers(headers)): + self.update_cookies(cookies_to_update, response_url) + + @abstractmethod + def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": + """Return the jar's cookies filtered by their attributes.""" + + +class AbstractStreamWriter(ABC): + """Abstract stream writer.""" + + buffer_size: int = 0 + output_size: int = 0 + length: Optional[int] = 0 + + @abstractmethod + async def write(self, chunk: Union[bytes, bytearray, memoryview]) -> None: + """Write chunk into stream.""" + + @abstractmethod + async def write_eof(self, chunk: bytes = b"") -> None: + """Write last chunk.""" + + @abstractmethod + async def drain(self) -> None: + """Flush the write buffer.""" + + @abstractmethod + def enable_compression( + self, encoding: str = "deflate", strategy: Optional[int] = None + ) -> None: + """Enable HTTP body compression""" + + @abstractmethod + def enable_chunking(self) -> None: + """Enable HTTP chunked mode""" + + @abstractmethod + async def write_headers( + self, status_line: str, headers: "CIMultiDict[str]" + ) -> None: + """Write HTTP headers""" + + def send_headers(self) -> None: + """Force sending buffered headers if not already sent. + + Required only if write_headers() buffers headers instead of sending immediately. + For backwards compatibility, this method does nothing by default. + """ + + +class AbstractAccessLogger(ABC): + """Abstract writer to access log.""" + + __slots__ = ("logger", "log_format") + + def __init__(self, logger: logging.Logger, log_format: str) -> None: + self.logger = logger + self.log_format = log_format + + @abstractmethod + def log(self, request: BaseRequest, response: StreamResponse, time: float) -> None: + """Emit log to logger.""" + + @property + def enabled(self) -> bool: + """Check if logger is enabled.""" + return True diff --git a/aiohttp/client_exceptions.py b/aiohttp/client_exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..1d298e9a8cf663cdc8a85d3b7d1f9264ff5e03c9 --- /dev/null +++ b/aiohttp/client_exceptions.py @@ -0,0 +1,421 @@ +"""HTTP related errors.""" + +import asyncio +import warnings +from typing import TYPE_CHECKING, Optional, Tuple, Union + +from multidict import MultiMapping + +from .typedefs import StrOrURL + +if TYPE_CHECKING: + import ssl + + SSLContext = ssl.SSLContext +else: + try: + import ssl + + SSLContext = ssl.SSLContext + except ImportError: # pragma: no cover + ssl = SSLContext = None # type: ignore[assignment] + +if TYPE_CHECKING: + from .client_reqrep import ClientResponse, ConnectionKey, Fingerprint, RequestInfo + from .http_parser import RawResponseMessage +else: + RequestInfo = ClientResponse = ConnectionKey = RawResponseMessage = None + +__all__ = ( + "ClientError", + "ClientConnectionError", + "ClientConnectionResetError", + "ClientOSError", + "ClientConnectorError", + "ClientProxyConnectionError", + "ClientSSLError", + "ClientConnectorDNSError", + "ClientConnectorSSLError", + "ClientConnectorCertificateError", + "ConnectionTimeoutError", + "SocketTimeoutError", + "ServerConnectionError", + "ServerTimeoutError", + "ServerDisconnectedError", + "ServerFingerprintMismatch", + "ClientResponseError", + "ClientHttpProxyError", + "WSServerHandshakeError", + "ContentTypeError", + "ClientPayloadError", + "InvalidURL", + "InvalidUrlClientError", + "RedirectClientError", + "NonHttpUrlClientError", + "InvalidUrlRedirectClientError", + "NonHttpUrlRedirectClientError", + "WSMessageTypeError", +) + + +class ClientError(Exception): + """Base class for client connection errors.""" + + +class ClientResponseError(ClientError): + """Base class for exceptions that occur after getting a response. + + request_info: An instance of RequestInfo. + history: A sequence of responses, if redirects occurred. + status: HTTP status code. + message: Error message. + headers: Response headers. + """ + + def __init__( + self, + request_info: RequestInfo, + history: Tuple[ClientResponse, ...], + *, + code: Optional[int] = None, + status: Optional[int] = None, + message: str = "", + headers: Optional[MultiMapping[str]] = None, + ) -> None: + self.request_info = request_info + if code is not None: + if status is not None: + raise ValueError( + "Both code and status arguments are provided; " + "code is deprecated, use status instead" + ) + warnings.warn( + "code argument is deprecated, use status instead", + DeprecationWarning, + stacklevel=2, + ) + if status is not None: + self.status = status + elif code is not None: + self.status = code + else: + self.status = 0 + self.message = message + self.headers = headers + self.history = history + self.args = (request_info, history) + + def __str__(self) -> str: + return "{}, message={!r}, url={!r}".format( + self.status, + self.message, + str(self.request_info.real_url), + ) + + def __repr__(self) -> str: + args = f"{self.request_info!r}, {self.history!r}" + if self.status != 0: + args += f", status={self.status!r}" + if self.message != "": + args += f", message={self.message!r}" + if self.headers is not None: + args += f", headers={self.headers!r}" + return f"{type(self).__name__}({args})" + + @property + def code(self) -> int: + warnings.warn( + "code property is deprecated, use status instead", + DeprecationWarning, + stacklevel=2, + ) + return self.status + + @code.setter + def code(self, value: int) -> None: + warnings.warn( + "code property is deprecated, use status instead", + DeprecationWarning, + stacklevel=2, + ) + self.status = value + + +class ContentTypeError(ClientResponseError): + """ContentType found is not valid.""" + + +class WSServerHandshakeError(ClientResponseError): + """websocket server handshake error.""" + + +class ClientHttpProxyError(ClientResponseError): + """HTTP proxy error. + + Raised in :class:`aiohttp.connector.TCPConnector` if + proxy responds with status other than ``200 OK`` + on ``CONNECT`` request. + """ + + +class TooManyRedirects(ClientResponseError): + """Client was redirected too many times.""" + + +class ClientConnectionError(ClientError): + """Base class for client socket errors.""" + + +class ClientConnectionResetError(ClientConnectionError, ConnectionResetError): + """ConnectionResetError""" + + +class ClientOSError(ClientConnectionError, OSError): + """OSError error.""" + + +class ClientConnectorError(ClientOSError): + """Client connector error. + + Raised in :class:`aiohttp.connector.TCPConnector` if + a connection can not be established. + """ + + def __init__(self, connection_key: ConnectionKey, os_error: OSError) -> None: + self._conn_key = connection_key + self._os_error = os_error + super().__init__(os_error.errno, os_error.strerror) + self.args = (connection_key, os_error) + + @property + def os_error(self) -> OSError: + return self._os_error + + @property + def host(self) -> str: + return self._conn_key.host + + @property + def port(self) -> Optional[int]: + return self._conn_key.port + + @property + def ssl(self) -> Union[SSLContext, bool, "Fingerprint"]: + return self._conn_key.ssl + + def __str__(self) -> str: + return "Cannot connect to host {0.host}:{0.port} ssl:{1} [{2}]".format( + self, "default" if self.ssl is True else self.ssl, self.strerror + ) + + # OSError.__reduce__ does too much black magick + __reduce__ = BaseException.__reduce__ + + +class ClientConnectorDNSError(ClientConnectorError): + """DNS resolution failed during client connection. + + Raised in :class:`aiohttp.connector.TCPConnector` if + DNS resolution fails. + """ + + +class ClientProxyConnectionError(ClientConnectorError): + """Proxy connection error. + + Raised in :class:`aiohttp.connector.TCPConnector` if + connection to proxy can not be established. + """ + + +class UnixClientConnectorError(ClientConnectorError): + """Unix connector error. + + Raised in :py:class:`aiohttp.connector.UnixConnector` + if connection to unix socket can not be established. + """ + + def __init__( + self, path: str, connection_key: ConnectionKey, os_error: OSError + ) -> None: + self._path = path + super().__init__(connection_key, os_error) + + @property + def path(self) -> str: + return self._path + + def __str__(self) -> str: + return "Cannot connect to unix socket {0.path} ssl:{1} [{2}]".format( + self, "default" if self.ssl is True else self.ssl, self.strerror + ) + + +class ServerConnectionError(ClientConnectionError): + """Server connection errors.""" + + +class ServerDisconnectedError(ServerConnectionError): + """Server disconnected.""" + + def __init__(self, message: Union[RawResponseMessage, str, None] = None) -> None: + if message is None: + message = "Server disconnected" + + self.args = (message,) + self.message = message + + +class ServerTimeoutError(ServerConnectionError, asyncio.TimeoutError): + """Server timeout error.""" + + +class ConnectionTimeoutError(ServerTimeoutError): + """Connection timeout error.""" + + +class SocketTimeoutError(ServerTimeoutError): + """Socket timeout error.""" + + +class ServerFingerprintMismatch(ServerConnectionError): + """SSL certificate does not match expected fingerprint.""" + + def __init__(self, expected: bytes, got: bytes, host: str, port: int) -> None: + self.expected = expected + self.got = got + self.host = host + self.port = port + self.args = (expected, got, host, port) + + def __repr__(self) -> str: + return "<{} expected={!r} got={!r} host={!r} port={!r}>".format( + self.__class__.__name__, self.expected, self.got, self.host, self.port + ) + + +class ClientPayloadError(ClientError): + """Response payload error.""" + + +class InvalidURL(ClientError, ValueError): + """Invalid URL. + + URL used for fetching is malformed, e.g. it doesn't contains host + part. + """ + + # Derive from ValueError for backward compatibility + + def __init__(self, url: StrOrURL, description: Union[str, None] = None) -> None: + # The type of url is not yarl.URL because the exception can be raised + # on URL(url) call + self._url = url + self._description = description + + if description: + super().__init__(url, description) + else: + super().__init__(url) + + @property + def url(self) -> StrOrURL: + return self._url + + @property + def description(self) -> "str | None": + return self._description + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self}>" + + def __str__(self) -> str: + if self._description: + return f"{self._url} - {self._description}" + return str(self._url) + + +class InvalidUrlClientError(InvalidURL): + """Invalid URL client error.""" + + +class RedirectClientError(ClientError): + """Client redirect error.""" + + +class NonHttpUrlClientError(ClientError): + """Non http URL client error.""" + + +class InvalidUrlRedirectClientError(InvalidUrlClientError, RedirectClientError): + """Invalid URL redirect client error.""" + + +class NonHttpUrlRedirectClientError(NonHttpUrlClientError, RedirectClientError): + """Non http URL redirect client error.""" + + +class ClientSSLError(ClientConnectorError): + """Base error for ssl.*Errors.""" + + +if ssl is not None: + cert_errors = (ssl.CertificateError,) + cert_errors_bases = ( + ClientSSLError, + ssl.CertificateError, + ) + + ssl_errors = (ssl.SSLError,) + ssl_error_bases = (ClientSSLError, ssl.SSLError) +else: # pragma: no cover + cert_errors = tuple() + cert_errors_bases = ( + ClientSSLError, + ValueError, + ) + + ssl_errors = tuple() + ssl_error_bases = (ClientSSLError,) + + +class ClientConnectorSSLError(*ssl_error_bases): # type: ignore[misc] + """Response ssl error.""" + + +class ClientConnectorCertificateError(*cert_errors_bases): # type: ignore[misc] + """Response certificate error.""" + + def __init__( + self, connection_key: ConnectionKey, certificate_error: Exception + ) -> None: + self._conn_key = connection_key + self._certificate_error = certificate_error + self.args = (connection_key, certificate_error) + + @property + def certificate_error(self) -> Exception: + return self._certificate_error + + @property + def host(self) -> str: + return self._conn_key.host + + @property + def port(self) -> Optional[int]: + return self._conn_key.port + + @property + def ssl(self) -> bool: + return self._conn_key.is_ssl + + def __str__(self) -> str: + return ( + "Cannot connect to host {0.host}:{0.port} ssl:{0.ssl} " + "[{0.certificate_error.__class__.__name__}: " + "{0.certificate_error.args}]".format(self) + ) + + +class WSMessageTypeError(TypeError): + """WebSocket message type is not valid.""" diff --git a/aiohttp/client_middleware_digest_auth.py b/aiohttp/client_middleware_digest_auth.py new file mode 100644 index 0000000000000000000000000000000000000000..5aab5acb85acbda5136fe0bef84df7ff4f492405 --- /dev/null +++ b/aiohttp/client_middleware_digest_auth.py @@ -0,0 +1,480 @@ +""" +Digest authentication middleware for aiohttp client. + +This middleware implements HTTP Digest Authentication according to RFC 7616, +providing a more secure alternative to Basic Authentication. It supports all +standard hash algorithms including MD5, SHA, SHA-256, SHA-512 and their session +variants, as well as both 'auth' and 'auth-int' quality of protection (qop) options. +""" + +import hashlib +import os +import re +import sys +import time +from typing import ( + Callable, + Dict, + Final, + FrozenSet, + List, + Literal, + Tuple, + TypedDict, + Union, +) + +from yarl import URL + +from . import hdrs +from .client_exceptions import ClientError +from .client_middlewares import ClientHandlerType +from .client_reqrep import ClientRequest, ClientResponse +from .payload import Payload + + +class DigestAuthChallenge(TypedDict, total=False): + realm: str + nonce: str + qop: str + algorithm: str + opaque: str + domain: str + stale: str + + +DigestFunctions: Dict[str, Callable[[bytes], "hashlib._Hash"]] = { + "MD5": hashlib.md5, + "MD5-SESS": hashlib.md5, + "SHA": hashlib.sha1, + "SHA-SESS": hashlib.sha1, + "SHA256": hashlib.sha256, + "SHA256-SESS": hashlib.sha256, + "SHA-256": hashlib.sha256, + "SHA-256-SESS": hashlib.sha256, + "SHA512": hashlib.sha512, + "SHA512-SESS": hashlib.sha512, + "SHA-512": hashlib.sha512, + "SHA-512-SESS": hashlib.sha512, +} + + +# Compile the regex pattern once at module level for performance +_HEADER_PAIRS_PATTERN = re.compile( + r'(?:^|\s|,\s*)(\w+)\s*=\s*(?:"((?:[^"\\]|\\.)*)"|([^\s,]+))' + if sys.version_info < (3, 11) + else r'(?:^|\s|,\s*)((?>\w+))\s*=\s*(?:"((?:[^"\\]|\\.)*)"|([^\s,]+))' + # +------------|--------|--|-|-|--|----|------|----|--||-----|-> Match valid start/sep + # +--------|--|-|-|--|----|------|----|--||-----|-> alphanumeric key (atomic + # | | | | | | | | || | group reduces backtracking) + # +--|-|-|--|----|------|----|--||-----|-> maybe whitespace + # | | | | | | | || | + # +-|-|--|----|------|----|--||-----|-> = (delimiter) + # +-|--|----|------|----|--||-----|-> maybe whitespace + # | | | | | || | + # +--|----|------|----|--||-----|-> group quoted or unquoted + # | | | | || | + # +----|------|----|--||-----|-> if quoted... + # +------|----|--||-----|-> anything but " or \ + # +----|--||-----|-> escaped characters allowed + # +--||-----|-> or can be empty string + # || | + # +|-----|-> if unquoted... + # +-----|-> anything but , or + # +-> at least one char req'd +) + + +# RFC 7616: Challenge parameters to extract +CHALLENGE_FIELDS: Final[ + Tuple[ + Literal["realm", "nonce", "qop", "algorithm", "opaque", "domain", "stale"], ... + ] +] = ( + "realm", + "nonce", + "qop", + "algorithm", + "opaque", + "domain", + "stale", +) + +# Supported digest authentication algorithms +# Use a tuple of sorted keys for predictable documentation and error messages +SUPPORTED_ALGORITHMS: Final[Tuple[str, ...]] = tuple(sorted(DigestFunctions.keys())) + +# RFC 7616: Fields that require quoting in the Digest auth header +# These fields must be enclosed in double quotes in the Authorization header. +# Algorithm, qop, and nc are never quoted per RFC specifications. +# This frozen set is used by the template-based header construction to +# automatically determine which fields need quotes. +QUOTED_AUTH_FIELDS: Final[FrozenSet[str]] = frozenset( + {"username", "realm", "nonce", "uri", "response", "opaque", "cnonce"} +) + + +def escape_quotes(value: str) -> str: + """Escape double quotes for HTTP header values.""" + return value.replace('"', '\\"') + + +def unescape_quotes(value: str) -> str: + """Unescape double quotes in HTTP header values.""" + return value.replace('\\"', '"') + + +def parse_header_pairs(header: str) -> Dict[str, str]: + """ + Parse key-value pairs from WWW-Authenticate or similar HTTP headers. + + This function handles the complex format of WWW-Authenticate header values, + supporting both quoted and unquoted values, proper handling of commas in + quoted values, and whitespace variations per RFC 7616. + + Examples of supported formats: + - key1="value1", key2=value2 + - key1 = "value1" , key2="value, with, commas" + - key1=value1,key2="value2" + - realm="example.com", nonce="12345", qop="auth" + + Args: + header: The header value string to parse + + Returns: + Dictionary mapping parameter names to their values + """ + return { + stripped_key: unescape_quotes(quoted_val) if quoted_val else unquoted_val + for key, quoted_val, unquoted_val in _HEADER_PAIRS_PATTERN.findall(header) + if (stripped_key := key.strip()) + } + + +class DigestAuthMiddleware: + """ + HTTP digest authentication middleware for aiohttp client. + + This middleware intercepts 401 Unauthorized responses containing a Digest + authentication challenge, calculates the appropriate digest credentials, + and automatically retries the request with the proper Authorization header. + + Features: + - Handles all aspects of Digest authentication handshake automatically + - Supports all standard hash algorithms: + - MD5, MD5-SESS + - SHA, SHA-SESS + - SHA256, SHA256-SESS, SHA-256, SHA-256-SESS + - SHA512, SHA512-SESS, SHA-512, SHA-512-SESS + - Supports 'auth' and 'auth-int' quality of protection modes + - Properly handles quoted strings and parameter parsing + - Includes replay attack protection with client nonce count tracking + - Supports preemptive authentication per RFC 7616 Section 3.6 + + Standards compliance: + - RFC 7616: HTTP Digest Access Authentication (primary reference) + - RFC 2617: HTTP Authentication (deprecated by RFC 7616) + - RFC 1945: Section 11.1 (username restrictions) + + Implementation notes: + The core digest calculation is inspired by the implementation in + https://github.com/requests/requests/blob/v2.18.4/requests/auth.py + with added support for modern digest auth features and error handling. + """ + + def __init__( + self, + login: str, + password: str, + preemptive: bool = True, + ) -> None: + if login is None: + raise ValueError("None is not allowed as login value") + + if password is None: + raise ValueError("None is not allowed as password value") + + if ":" in login: + raise ValueError('A ":" is not allowed in username (RFC 1945#section-11.1)') + + self._login_str: Final[str] = login + self._login_bytes: Final[bytes] = login.encode("utf-8") + self._password_bytes: Final[bytes] = password.encode("utf-8") + + self._last_nonce_bytes = b"" + self._nonce_count = 0 + self._challenge: DigestAuthChallenge = {} + self._preemptive: bool = preemptive + # Set of URLs defining the protection space + self._protection_space: List[str] = [] + + async def _encode( + self, method: str, url: URL, body: Union[Payload, Literal[b""]] + ) -> str: + """ + Build digest authorization header for the current challenge. + + Args: + method: The HTTP method (GET, POST, etc.) + url: The request URL + body: The request body (used for qop=auth-int) + + Returns: + A fully formatted Digest authorization header string + + Raises: + ClientError: If the challenge is missing required parameters or + contains unsupported values + + """ + challenge = self._challenge + if "realm" not in challenge: + raise ClientError( + "Malformed Digest auth challenge: Missing 'realm' parameter" + ) + + if "nonce" not in challenge: + raise ClientError( + "Malformed Digest auth challenge: Missing 'nonce' parameter" + ) + + # Empty realm values are allowed per RFC 7616 (SHOULD, not MUST, contain host name) + realm = challenge["realm"] + nonce = challenge["nonce"] + + # Empty nonce values are not allowed as they are security-critical for replay protection + if not nonce: + raise ClientError( + "Security issue: Digest auth challenge contains empty 'nonce' value" + ) + + qop_raw = challenge.get("qop", "") + # Preserve original algorithm case for response while using uppercase for processing + algorithm_original = challenge.get("algorithm", "MD5") + algorithm = algorithm_original.upper() + opaque = challenge.get("opaque", "") + + # Convert string values to bytes once + nonce_bytes = nonce.encode("utf-8") + realm_bytes = realm.encode("utf-8") + path = URL(url).path_qs + + # Process QoP + qop = "" + qop_bytes = b"" + if qop_raw: + valid_qops = {"auth", "auth-int"}.intersection( + {q.strip() for q in qop_raw.split(",") if q.strip()} + ) + if not valid_qops: + raise ClientError( + f"Digest auth error: Unsupported Quality of Protection (qop) value(s): {qop_raw}" + ) + + qop = "auth-int" if "auth-int" in valid_qops else "auth" + qop_bytes = qop.encode("utf-8") + + if algorithm not in DigestFunctions: + raise ClientError( + f"Digest auth error: Unsupported hash algorithm: {algorithm}. " + f"Supported algorithms: {', '.join(SUPPORTED_ALGORITHMS)}" + ) + hash_fn: Final = DigestFunctions[algorithm] + + def H(x: bytes) -> bytes: + """RFC 7616 Section 3: Hash function H(data) = hex(hash(data)).""" + return hash_fn(x).hexdigest().encode() + + def KD(s: bytes, d: bytes) -> bytes: + """RFC 7616 Section 3: KD(secret, data) = H(concat(secret, ":", data)).""" + return H(b":".join((s, d))) + + # Calculate A1 and A2 + A1 = b":".join((self._login_bytes, realm_bytes, self._password_bytes)) + A2 = f"{method.upper()}:{path}".encode() + if qop == "auth-int": + if isinstance(body, Payload): # will always be empty bytes unless Payload + entity_bytes = await body.as_bytes() # Get bytes from Payload + else: + entity_bytes = body + entity_hash = H(entity_bytes) + A2 = b":".join((A2, entity_hash)) + + HA1 = H(A1) + HA2 = H(A2) + + # Nonce count handling + if nonce_bytes == self._last_nonce_bytes: + self._nonce_count += 1 + else: + self._nonce_count = 1 + + self._last_nonce_bytes = nonce_bytes + ncvalue = f"{self._nonce_count:08x}" + ncvalue_bytes = ncvalue.encode("utf-8") + + # Generate client nonce + cnonce = hashlib.sha1( + b"".join( + [ + str(self._nonce_count).encode("utf-8"), + nonce_bytes, + time.ctime().encode("utf-8"), + os.urandom(8), + ] + ) + ).hexdigest()[:16] + cnonce_bytes = cnonce.encode("utf-8") + + # Special handling for session-based algorithms + if algorithm.upper().endswith("-SESS"): + HA1 = H(b":".join((HA1, nonce_bytes, cnonce_bytes))) + + # Calculate the response digest + if qop: + noncebit = b":".join( + (nonce_bytes, ncvalue_bytes, cnonce_bytes, qop_bytes, HA2) + ) + response_digest = KD(HA1, noncebit) + else: + response_digest = KD(HA1, b":".join((nonce_bytes, HA2))) + + # Define a dict mapping of header fields to their values + # Group fields into always-present, optional, and qop-dependent + header_fields = { + # Always present fields + "username": escape_quotes(self._login_str), + "realm": escape_quotes(realm), + "nonce": escape_quotes(nonce), + "uri": path, + "response": response_digest.decode(), + "algorithm": algorithm_original, + } + + # Optional fields + if opaque: + header_fields["opaque"] = escape_quotes(opaque) + + # QoP-dependent fields + if qop: + header_fields["qop"] = qop + header_fields["nc"] = ncvalue + header_fields["cnonce"] = cnonce + + # Build header using templates for each field type + pairs: List[str] = [] + for field, value in header_fields.items(): + if field in QUOTED_AUTH_FIELDS: + pairs.append(f'{field}="{value}"') + else: + pairs.append(f"{field}={value}") + + return f"Digest {', '.join(pairs)}" + + def _in_protection_space(self, url: URL) -> bool: + """ + Check if the given URL is within the current protection space. + + According to RFC 7616, a URI is in the protection space if any URI + in the protection space is a prefix of it (after both have been made absolute). + """ + request_str = str(url) + for space_str in self._protection_space: + # Check if request starts with space URL + if not request_str.startswith(space_str): + continue + # Exact match or space ends with / (proper directory prefix) + if len(request_str) == len(space_str) or space_str[-1] == "/": + return True + # Check next char is / to ensure proper path boundary + if request_str[len(space_str)] == "/": + return True + return False + + def _authenticate(self, response: ClientResponse) -> bool: + """ + Takes the given response and tries digest-auth, if needed. + + Returns true if the original request must be resent. + """ + if response.status != 401: + return False + + auth_header = response.headers.get("www-authenticate", "") + if not auth_header: + return False # No authentication header present + + method, sep, headers = auth_header.partition(" ") + if not sep: + # No space found in www-authenticate header + return False # Malformed auth header, missing scheme separator + + if method.lower() != "digest": + # Not a digest auth challenge (could be Basic, Bearer, etc.) + return False + + if not headers: + # We have a digest scheme but no parameters + return False # Malformed digest header, missing parameters + + # We have a digest auth header with content + if not (header_pairs := parse_header_pairs(headers)): + # Failed to parse any key-value pairs + return False # Malformed digest header, no valid parameters + + # Extract challenge parameters + self._challenge = {} + for field in CHALLENGE_FIELDS: + if value := header_pairs.get(field): + self._challenge[field] = value + + # Update protection space based on domain parameter or default to origin + origin = response.url.origin() + + if domain := self._challenge.get("domain"): + # Parse space-separated list of URIs + self._protection_space = [] + for uri in domain.split(): + # Remove quotes if present + uri = uri.strip('"') + if uri.startswith("/"): + # Path-absolute, relative to origin + self._protection_space.append(str(origin.join(URL(uri)))) + else: + # Absolute URI + self._protection_space.append(str(URL(uri))) + else: + # No domain specified, protection space is entire origin + self._protection_space = [str(origin)] + + # Return True only if we found at least one challenge parameter + return bool(self._challenge) + + async def __call__( + self, request: ClientRequest, handler: ClientHandlerType + ) -> ClientResponse: + """Run the digest auth middleware.""" + response = None + for retry_count in range(2): + # Apply authorization header if: + # 1. This is a retry after 401 (retry_count > 0), OR + # 2. Preemptive auth is enabled AND we have a challenge AND the URL is in protection space + if retry_count > 0 or ( + self._preemptive + and self._challenge + and self._in_protection_space(request.url) + ): + request.headers[hdrs.AUTHORIZATION] = await self._encode( + request.method, request.url, request.body + ) + + # Send the request + response = await handler(request) + + # Check if we need to authenticate + if not self._authenticate(response): + break + + # At this point, response is guaranteed to be defined + assert response is not None + return response diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py new file mode 100644 index 0000000000000000000000000000000000000000..a9e0795893d6a56595f6f7b04b62cc4486d48a0f --- /dev/null +++ b/aiohttp/client_reqrep.py @@ -0,0 +1,1536 @@ +import asyncio +import codecs +import contextlib +import functools +import io +import re +import sys +import traceback +import warnings +from collections.abc import Mapping +from hashlib import md5, sha1, sha256 +from http.cookies import Morsel, SimpleCookie +from types import MappingProxyType, TracebackType +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Literal, + NamedTuple, + Optional, + Tuple, + Type, + Union, +) + +import attr +from multidict import CIMultiDict, CIMultiDictProxy, MultiDict, MultiDictProxy +from yarl import URL + +from . import hdrs, helpers, http, multipart, payload +from ._cookie_helpers import ( + parse_cookie_header, + parse_set_cookie_headers, + preserve_morsel_with_coded_value, +) +from .abc import AbstractStreamWriter +from .client_exceptions import ( + ClientConnectionError, + ClientOSError, + ClientResponseError, + ContentTypeError, + InvalidURL, + ServerFingerprintMismatch, +) +from .compression_utils import HAS_BROTLI, HAS_ZSTD +from .formdata import FormData +from .helpers import ( + _SENTINEL, + BaseTimerContext, + BasicAuth, + HeadersMixin, + TimerNoop, + noop, + reify, + sentinel, + set_exception, + set_result, +) +from .http import ( + SERVER_SOFTWARE, + HttpVersion, + HttpVersion10, + HttpVersion11, + StreamWriter, +) +from .streams import StreamReader +from .typedefs import ( + DEFAULT_JSON_DECODER, + JSONDecoder, + LooseCookies, + LooseHeaders, + Query, + RawHeaders, +) + +if TYPE_CHECKING: + import ssl + from ssl import SSLContext +else: + try: + import ssl + from ssl import SSLContext + except ImportError: # pragma: no cover + ssl = None # type: ignore[assignment] + SSLContext = object # type: ignore[misc,assignment] + + +__all__ = ("ClientRequest", "ClientResponse", "RequestInfo", "Fingerprint") + + +if TYPE_CHECKING: + from .client import ClientSession + from .connector import Connection + from .tracing import Trace + + +_CONNECTION_CLOSED_EXCEPTION = ClientConnectionError("Connection closed") +_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") +json_re = re.compile(r"^application/(?:[\w.+-]+?\+)?json") + + +def _gen_default_accept_encoding() -> str: + encodings = [ + "gzip", + "deflate", + ] + if HAS_BROTLI: + encodings.append("br") + if HAS_ZSTD: + encodings.append("zstd") + return ", ".join(encodings) + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class ContentDisposition: + type: Optional[str] + parameters: "MappingProxyType[str, str]" + filename: Optional[str] + + +class _RequestInfo(NamedTuple): + url: URL + method: str + headers: "CIMultiDictProxy[str]" + real_url: URL + + +class RequestInfo(_RequestInfo): + + def __new__( + cls, + url: URL, + method: str, + headers: "CIMultiDictProxy[str]", + real_url: Union[URL, _SENTINEL] = sentinel, + ) -> "RequestInfo": + """Create a new RequestInfo instance. + + For backwards compatibility, the real_url parameter is optional. + """ + return tuple.__new__( + cls, (url, method, headers, url if real_url is sentinel else real_url) + ) + + +class Fingerprint: + HASHFUNC_BY_DIGESTLEN = { + 16: md5, + 20: sha1, + 32: sha256, + } + + def __init__(self, fingerprint: bytes) -> None: + digestlen = len(fingerprint) + hashfunc = self.HASHFUNC_BY_DIGESTLEN.get(digestlen) + if not hashfunc: + raise ValueError("fingerprint has invalid length") + elif hashfunc is md5 or hashfunc is sha1: + raise ValueError("md5 and sha1 are insecure and not supported. Use sha256.") + self._hashfunc = hashfunc + self._fingerprint = fingerprint + + @property + def fingerprint(self) -> bytes: + return self._fingerprint + + def check(self, transport: asyncio.Transport) -> None: + if not transport.get_extra_info("sslcontext"): + return + sslobj = transport.get_extra_info("ssl_object") + cert = sslobj.getpeercert(binary_form=True) + got = self._hashfunc(cert).digest() + if got != self._fingerprint: + host, port, *_ = transport.get_extra_info("peername") + raise ServerFingerprintMismatch(self._fingerprint, got, host, port) + + +if ssl is not None: + SSL_ALLOWED_TYPES = (ssl.SSLContext, bool, Fingerprint, type(None)) +else: # pragma: no cover + SSL_ALLOWED_TYPES = (bool, type(None)) + + +def _merge_ssl_params( + ssl: Union["SSLContext", bool, Fingerprint], + verify_ssl: Optional[bool], + ssl_context: Optional["SSLContext"], + fingerprint: Optional[bytes], +) -> Union["SSLContext", bool, Fingerprint]: + if ssl is None: + ssl = True # Double check for backwards compatibility + if verify_ssl is not None and not verify_ssl: + warnings.warn( + "verify_ssl is deprecated, use ssl=False instead", + DeprecationWarning, + stacklevel=3, + ) + if ssl is not True: + raise ValueError( + "verify_ssl, ssl_context, fingerprint and ssl " + "parameters are mutually exclusive" + ) + else: + ssl = False + if ssl_context is not None: + warnings.warn( + "ssl_context is deprecated, use ssl=context instead", + DeprecationWarning, + stacklevel=3, + ) + if ssl is not True: + raise ValueError( + "verify_ssl, ssl_context, fingerprint and ssl " + "parameters are mutually exclusive" + ) + else: + ssl = ssl_context + if fingerprint is not None: + warnings.warn( + "fingerprint is deprecated, use ssl=Fingerprint(fingerprint) instead", + DeprecationWarning, + stacklevel=3, + ) + if ssl is not True: + raise ValueError( + "verify_ssl, ssl_context, fingerprint and ssl " + "parameters are mutually exclusive" + ) + else: + ssl = Fingerprint(fingerprint) + if not isinstance(ssl, SSL_ALLOWED_TYPES): + raise TypeError( + "ssl should be SSLContext, bool, Fingerprint or None, " + "got {!r} instead.".format(ssl) + ) + return ssl + + +_SSL_SCHEMES = frozenset(("https", "wss")) + + +# ConnectionKey is a NamedTuple because it is used as a key in a dict +# and a set in the connector. Since a NamedTuple is a tuple it uses +# the fast native tuple __hash__ and __eq__ implementation in CPython. +class ConnectionKey(NamedTuple): + # the key should contain an information about used proxy / TLS + # to prevent reusing wrong connections from a pool + host: str + port: Optional[int] + is_ssl: bool + ssl: Union[SSLContext, bool, Fingerprint] + proxy: Optional[URL] + proxy_auth: Optional[BasicAuth] + proxy_headers_hash: Optional[int] # hash(CIMultiDict) + + +def _is_expected_content_type( + response_content_type: str, expected_content_type: str +) -> bool: + if expected_content_type == "application/json": + return json_re.match(response_content_type) is not None + return expected_content_type in response_content_type + + +def _warn_if_unclosed_payload(payload: payload.Payload, stacklevel: int = 2) -> None: + """Warn if the payload is not closed. + + Callers must check that the body is a Payload before calling this method. + + Args: + payload: The payload to check + stacklevel: Stack level for the warning (default 2 for direct callers) + """ + if not payload.autoclose and not payload.consumed: + warnings.warn( + "The previous request body contains unclosed resources. " + "Use await request.update_body() instead of setting request.body " + "directly to properly close resources and avoid leaks.", + ResourceWarning, + stacklevel=stacklevel, + ) + + +class ClientResponse(HeadersMixin): + + # Some of these attributes are None when created, + # but will be set by the start() method. + # As the end user will likely never see the None values, we cheat the types below. + # from the Status-Line of the response + version: Optional[HttpVersion] = None # HTTP-Version + status: int = None # type: ignore[assignment] # Status-Code + reason: Optional[str] = None # Reason-Phrase + + content: StreamReader = None # type: ignore[assignment] # Payload stream + _body: Optional[bytes] = None + _headers: CIMultiDictProxy[str] = None # type: ignore[assignment] + _history: Tuple["ClientResponse", ...] = () + _raw_headers: RawHeaders = None # type: ignore[assignment] + + _connection: Optional["Connection"] = None # current connection + _cookies: Optional[SimpleCookie] = None + _raw_cookie_headers: Optional[Tuple[str, ...]] = None + _continue: Optional["asyncio.Future[bool]"] = None + _source_traceback: Optional[traceback.StackSummary] = None + _session: Optional["ClientSession"] = None + # set up by ClientRequest after ClientResponse object creation + # post-init stage allows to not change ctor signature + _closed = True # to allow __del__ for non-initialized properly response + _released = False + _in_context = False + + _resolve_charset: Callable[["ClientResponse", bytes], str] = lambda *_: "utf-8" + + __writer: Optional["asyncio.Task[None]"] = None + + def __init__( + self, + method: str, + url: URL, + *, + writer: "Optional[asyncio.Task[None]]", + continue100: Optional["asyncio.Future[bool]"], + timer: BaseTimerContext, + request_info: RequestInfo, + traces: List["Trace"], + loop: asyncio.AbstractEventLoop, + session: "ClientSession", + ) -> None: + # URL forbids subclasses, so a simple type check is enough. + assert type(url) is URL + + self.method = method + + self._real_url = url + self._url = url.with_fragment(None) if url.raw_fragment else url + if writer is not None: + self._writer = writer + if continue100 is not None: + self._continue = continue100 + self._request_info = request_info + self._timer = timer if timer is not None else TimerNoop() + self._cache: Dict[str, Any] = {} + self._traces = traces + self._loop = loop + # Save reference to _resolve_charset, so that get_encoding() will still + # work after the response has finished reading the body. + # TODO: Fix session=None in tests (see ClientRequest.__init__). + if session is not None: + # store a reference to session #1985 + self._session = session + self._resolve_charset = session._resolve_charset + if loop.get_debug(): + self._source_traceback = traceback.extract_stack(sys._getframe(1)) + + def __reset_writer(self, _: object = None) -> None: + self.__writer = None + + @property + def _writer(self) -> Optional["asyncio.Task[None]"]: + """The writer task for streaming data. + + _writer is only provided for backwards compatibility + for subclasses that may need to access it. + """ + return self.__writer + + @_writer.setter + def _writer(self, writer: Optional["asyncio.Task[None]"]) -> None: + """Set the writer task for streaming data.""" + if self.__writer is not None: + self.__writer.remove_done_callback(self.__reset_writer) + self.__writer = writer + if writer is None: + return + if writer.done(): + # The writer is already done, so we can clear it immediately. + self.__writer = None + else: + writer.add_done_callback(self.__reset_writer) + + @property + def cookies(self) -> SimpleCookie: + if self._cookies is None: + if self._raw_cookie_headers is not None: + # Parse cookies for response.cookies (SimpleCookie for backward compatibility) + cookies = SimpleCookie() + # Use parse_set_cookie_headers for more lenient parsing that handles + # malformed cookies better than SimpleCookie.load + cookies.update(parse_set_cookie_headers(self._raw_cookie_headers)) + self._cookies = cookies + else: + self._cookies = SimpleCookie() + return self._cookies + + @cookies.setter + def cookies(self, cookies: SimpleCookie) -> None: + self._cookies = cookies + # Generate raw cookie headers from the SimpleCookie + if cookies: + self._raw_cookie_headers = tuple( + morsel.OutputString() for morsel in cookies.values() + ) + else: + self._raw_cookie_headers = None + + @reify + def url(self) -> URL: + return self._url + + @reify + def url_obj(self) -> URL: + warnings.warn("Deprecated, use .url #1654", DeprecationWarning, stacklevel=2) + return self._url + + @reify + def real_url(self) -> URL: + return self._real_url + + @reify + def host(self) -> str: + assert self._url.host is not None + return self._url.host + + @reify + def headers(self) -> "CIMultiDictProxy[str]": + return self._headers + + @reify + def raw_headers(self) -> RawHeaders: + return self._raw_headers + + @reify + def request_info(self) -> RequestInfo: + return self._request_info + + @reify + def content_disposition(self) -> Optional[ContentDisposition]: + raw = self._headers.get(hdrs.CONTENT_DISPOSITION) + if raw is None: + return None + disposition_type, params_dct = multipart.parse_content_disposition(raw) + params = MappingProxyType(params_dct) + filename = multipart.content_disposition_filename(params) + return ContentDisposition(disposition_type, params, filename) + + def __del__(self, _warnings: Any = warnings) -> None: + if self._closed: + return + + if self._connection is not None: + self._connection.release() + self._cleanup_writer() + + if self._loop.get_debug(): + kwargs = {"source": self} + _warnings.warn(f"Unclosed response {self!r}", ResourceWarning, **kwargs) + context = {"client_response": self, "message": "Unclosed response"} + if self._source_traceback: + context["source_traceback"] = self._source_traceback + self._loop.call_exception_handler(context) + + def __repr__(self) -> str: + out = io.StringIO() + ascii_encodable_url = str(self.url) + if self.reason: + ascii_encodable_reason = self.reason.encode( + "ascii", "backslashreplace" + ).decode("ascii") + else: + ascii_encodable_reason = "None" + print( + "".format( + ascii_encodable_url, self.status, ascii_encodable_reason + ), + file=out, + ) + print(self.headers, file=out) + return out.getvalue() + + @property + def connection(self) -> Optional["Connection"]: + return self._connection + + @reify + def history(self) -> Tuple["ClientResponse", ...]: + """A sequence of of responses, if redirects occurred.""" + return self._history + + @reify + def links(self) -> "MultiDictProxy[MultiDictProxy[Union[str, URL]]]": + links_str = ", ".join(self.headers.getall("link", [])) + + if not links_str: + return MultiDictProxy(MultiDict()) + + links: MultiDict[MultiDictProxy[Union[str, URL]]] = MultiDict() + + for val in re.split(r",(?=\s*<)", links_str): + match = re.match(r"\s*<(.*)>(.*)", val) + if match is None: # pragma: no cover + # the check exists to suppress mypy error + continue + url, params_str = match.groups() + params = params_str.split(";")[1:] + + link: MultiDict[Union[str, URL]] = MultiDict() + + for param in params: + match = re.match(r"^\s*(\S*)\s*=\s*(['\"]?)(.*?)(\2)\s*$", param, re.M) + if match is None: # pragma: no cover + # the check exists to suppress mypy error + continue + key, _, value, _ = match.groups() + + link.add(key, value) + + key = link.get("rel", url) + + link.add("url", self.url.join(URL(url))) + + links.add(str(key), MultiDictProxy(link)) + + return MultiDictProxy(links) + + async def start(self, connection: "Connection") -> "ClientResponse": + """Start response processing.""" + self._closed = False + self._protocol = connection.protocol + self._connection = connection + + with self._timer: + while True: + # read response + try: + protocol = self._protocol + message, payload = await protocol.read() # type: ignore[union-attr] + except http.HttpProcessingError as exc: + raise ClientResponseError( + self.request_info, + self.history, + status=exc.code, + message=exc.message, + headers=exc.headers, + ) from exc + + if message.code < 100 or message.code > 199 or message.code == 101: + break + + if self._continue is not None: + set_result(self._continue, True) + self._continue = None + + # payload eof handler + payload.on_eof(self._response_eof) + + # response status + self.version = message.version + self.status = message.code + self.reason = message.reason + + # headers + self._headers = message.headers # type is CIMultiDictProxy + self._raw_headers = message.raw_headers # type is Tuple[bytes, bytes] + + # payload + self.content = payload + + # cookies + if cookie_hdrs := self.headers.getall(hdrs.SET_COOKIE, ()): + # Store raw cookie headers for CookieJar + self._raw_cookie_headers = tuple(cookie_hdrs) + return self + + def _response_eof(self) -> None: + if self._closed: + return + + # protocol could be None because connection could be detached + protocol = self._connection and self._connection.protocol + if protocol is not None and protocol.upgraded: + return + + self._closed = True + self._cleanup_writer() + self._release_connection() + + @property + def closed(self) -> bool: + return self._closed + + def close(self) -> None: + if not self._released: + self._notify_content() + + self._closed = True + if self._loop is None or self._loop.is_closed(): + return + + self._cleanup_writer() + if self._connection is not None: + self._connection.close() + self._connection = None + + def release(self) -> Any: + if not self._released: + self._notify_content() + + self._closed = True + + self._cleanup_writer() + self._release_connection() + return noop() + + @property + def ok(self) -> bool: + """Returns ``True`` if ``status`` is less than ``400``, ``False`` if not. + + This is **not** a check for ``200 OK`` but a check that the response + status is under 400. + """ + return 400 > self.status + + def raise_for_status(self) -> None: + if not self.ok: + # reason should always be not None for a started response + assert self.reason is not None + + # If we're in a context we can rely on __aexit__() to release as the + # exception propagates. + if not self._in_context: + self.release() + + raise ClientResponseError( + self.request_info, + self.history, + status=self.status, + message=self.reason, + headers=self.headers, + ) + + def _release_connection(self) -> None: + if self._connection is not None: + if self.__writer is None: + self._connection.release() + self._connection = None + else: + self.__writer.add_done_callback(lambda f: self._release_connection()) + + async def _wait_released(self) -> None: + if self.__writer is not None: + try: + await self.__writer + except asyncio.CancelledError: + if ( + sys.version_info >= (3, 11) + and (task := asyncio.current_task()) + and task.cancelling() + ): + raise + self._release_connection() + + def _cleanup_writer(self) -> None: + if self.__writer is not None: + self.__writer.cancel() + self._session = None + + def _notify_content(self) -> None: + content = self.content + if content and content.exception() is None: + set_exception(content, _CONNECTION_CLOSED_EXCEPTION) + self._released = True + + async def wait_for_close(self) -> None: + if self.__writer is not None: + try: + await self.__writer + except asyncio.CancelledError: + if ( + sys.version_info >= (3, 11) + and (task := asyncio.current_task()) + and task.cancelling() + ): + raise + self.release() + + async def read(self) -> bytes: + """Read response payload.""" + if self._body is None: + try: + self._body = await self.content.read() + for trace in self._traces: + await trace.send_response_chunk_received( + self.method, self.url, self._body + ) + except BaseException: + self.close() + raise + elif self._released: # Response explicitly released + raise ClientConnectionError("Connection closed") + + protocol = self._connection and self._connection.protocol + if protocol is None or not protocol.upgraded: + await self._wait_released() # Underlying connection released + return self._body + + def get_encoding(self) -> str: + ctype = self.headers.get(hdrs.CONTENT_TYPE, "").lower() + mimetype = helpers.parse_mimetype(ctype) + + encoding = mimetype.parameters.get("charset") + if encoding: + with contextlib.suppress(LookupError, ValueError): + return codecs.lookup(encoding).name + + if mimetype.type == "application" and ( + mimetype.subtype == "json" or mimetype.subtype == "rdap" + ): + # RFC 7159 states that the default encoding is UTF-8. + # RFC 7483 defines application/rdap+json + return "utf-8" + + if self._body is None: + raise RuntimeError( + "Cannot compute fallback encoding of a not yet read body" + ) + + return self._resolve_charset(self, self._body) + + async def text(self, encoding: Optional[str] = None, errors: str = "strict") -> str: + """Read response payload and decode.""" + if self._body is None: + await self.read() + + if encoding is None: + encoding = self.get_encoding() + + return self._body.decode(encoding, errors=errors) # type: ignore[union-attr] + + async def json( + self, + *, + encoding: Optional[str] = None, + loads: JSONDecoder = DEFAULT_JSON_DECODER, + content_type: Optional[str] = "application/json", + ) -> Any: + """Read and decodes JSON response.""" + if self._body is None: + await self.read() + + if content_type: + ctype = self.headers.get(hdrs.CONTENT_TYPE, "").lower() + if not _is_expected_content_type(ctype, content_type): + raise ContentTypeError( + self.request_info, + self.history, + status=self.status, + message=( + "Attempt to decode JSON with unexpected mimetype: %s" % ctype + ), + headers=self.headers, + ) + + stripped = self._body.strip() # type: ignore[union-attr] + if not stripped: + return None + + if encoding is None: + encoding = self.get_encoding() + + return loads(stripped.decode(encoding)) + + async def __aenter__(self) -> "ClientResponse": + self._in_context = True + return self + + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self._in_context = False + # similar to _RequestContextManager, we do not need to check + # for exceptions, response object can close connection + # if state is broken + self.release() + await self.wait_for_close() + + +class ClientRequest: + GET_METHODS = { + hdrs.METH_GET, + hdrs.METH_HEAD, + hdrs.METH_OPTIONS, + hdrs.METH_TRACE, + } + POST_METHODS = {hdrs.METH_PATCH, hdrs.METH_POST, hdrs.METH_PUT} + ALL_METHODS = GET_METHODS.union(POST_METHODS).union({hdrs.METH_DELETE}) + + DEFAULT_HEADERS = { + hdrs.ACCEPT: "*/*", + hdrs.ACCEPT_ENCODING: _gen_default_accept_encoding(), + } + + # Type of body depends on PAYLOAD_REGISTRY, which is dynamic. + _body: Union[None, payload.Payload] = None + auth = None + response = None + + __writer: Optional["asyncio.Task[None]"] = None # async task for streaming data + + # These class defaults help create_autospec() work correctly. + # If autospec is improved in future, maybe these can be removed. + url = URL() + method = "GET" + + _continue = None # waiter future for '100 Continue' response + + _skip_auto_headers: Optional["CIMultiDict[None]"] = None + + # N.B. + # Adding __del__ method with self._writer closing doesn't make sense + # because _writer is instance method, thus it keeps a reference to self. + # Until writer has finished finalizer will not be called. + + def __init__( + self, + method: str, + url: URL, + *, + params: Query = None, + headers: Optional[LooseHeaders] = None, + skip_auto_headers: Optional[Iterable[str]] = None, + data: Any = None, + cookies: Optional[LooseCookies] = None, + auth: Optional[BasicAuth] = None, + version: http.HttpVersion = http.HttpVersion11, + compress: Union[str, bool, None] = None, + chunked: Optional[bool] = None, + expect100: bool = False, + loop: Optional[asyncio.AbstractEventLoop] = None, + response_class: Optional[Type["ClientResponse"]] = None, + proxy: Optional[URL] = None, + proxy_auth: Optional[BasicAuth] = None, + timer: Optional[BaseTimerContext] = None, + session: Optional["ClientSession"] = None, + ssl: Union[SSLContext, bool, Fingerprint] = True, + proxy_headers: Optional[LooseHeaders] = None, + traces: Optional[List["Trace"]] = None, + trust_env: bool = False, + server_hostname: Optional[str] = None, + ): + if loop is None: + loop = asyncio.get_event_loop() + if match := _CONTAINS_CONTROL_CHAR_RE.search(method): + raise ValueError( + f"Method cannot contain non-token characters {method!r} " + f"(found at least {match.group()!r})" + ) + # URL forbids subclasses, so a simple type check is enough. + assert type(url) is URL, url + if proxy is not None: + assert type(proxy) is URL, proxy + # FIXME: session is None in tests only, need to fix tests + # assert session is not None + if TYPE_CHECKING: + assert session is not None + self._session = session + if params: + url = url.extend_query(params) + self.original_url = url + self.url = url.with_fragment(None) if url.raw_fragment else url + self.method = method.upper() + self.chunked = chunked + self.compress = compress + self.loop = loop + self.length = None + if response_class is None: + real_response_class = ClientResponse + else: + real_response_class = response_class + self.response_class: Type[ClientResponse] = real_response_class + self._timer = timer if timer is not None else TimerNoop() + self._ssl = ssl if ssl is not None else True + self.server_hostname = server_hostname + + if loop.get_debug(): + self._source_traceback = traceback.extract_stack(sys._getframe(1)) + + self.update_version(version) + self.update_host(url) + self.update_headers(headers) + self.update_auto_headers(skip_auto_headers) + self.update_cookies(cookies) + self.update_content_encoding(data) + self.update_auth(auth, trust_env) + self.update_proxy(proxy, proxy_auth, proxy_headers) + + self.update_body_from_data(data) + if data is not None or self.method not in self.GET_METHODS: + self.update_transfer_encoding() + self.update_expect_continue(expect100) + self._traces = [] if traces is None else traces + + def __reset_writer(self, _: object = None) -> None: + self.__writer = None + + def _get_content_length(self) -> Optional[int]: + """Extract and validate Content-Length header value. + + Returns parsed Content-Length value or None if not set. + Raises ValueError if header exists but cannot be parsed as an integer. + """ + if hdrs.CONTENT_LENGTH not in self.headers: + return None + + content_length_hdr = self.headers[hdrs.CONTENT_LENGTH] + try: + return int(content_length_hdr) + except ValueError: + raise ValueError( + f"Invalid Content-Length header: {content_length_hdr}" + ) from None + + @property + def skip_auto_headers(self) -> CIMultiDict[None]: + return self._skip_auto_headers or CIMultiDict() + + @property + def _writer(self) -> Optional["asyncio.Task[None]"]: + return self.__writer + + @_writer.setter + def _writer(self, writer: "asyncio.Task[None]") -> None: + if self.__writer is not None: + self.__writer.remove_done_callback(self.__reset_writer) + self.__writer = writer + writer.add_done_callback(self.__reset_writer) + + def is_ssl(self) -> bool: + return self.url.scheme in _SSL_SCHEMES + + @property + def ssl(self) -> Union["SSLContext", bool, Fingerprint]: + return self._ssl + + @property + def connection_key(self) -> ConnectionKey: + if proxy_headers := self.proxy_headers: + h: Optional[int] = hash(tuple(proxy_headers.items())) + else: + h = None + url = self.url + return tuple.__new__( + ConnectionKey, + ( + url.raw_host or "", + url.port, + url.scheme in _SSL_SCHEMES, + self._ssl, + self.proxy, + self.proxy_auth, + h, + ), + ) + + @property + def host(self) -> str: + ret = self.url.raw_host + assert ret is not None + return ret + + @property + def port(self) -> Optional[int]: + return self.url.port + + @property + def body(self) -> Union[payload.Payload, Literal[b""]]: + """Request body.""" + # empty body is represented as bytes for backwards compatibility + return self._body or b"" + + @body.setter + def body(self, value: Any) -> None: + """Set request body with warning for non-autoclose payloads. + + WARNING: This setter must be called from within an event loop and is not + thread-safe. Setting body outside of an event loop may raise RuntimeError + when closing file-based payloads. + + DEPRECATED: Direct assignment to body is deprecated and will be removed + in a future version. Use await update_body() instead for proper resource + management. + """ + # Close existing payload if present + if self._body is not None: + # Warn if the payload needs manual closing + # stacklevel=3: user code -> body setter -> _warn_if_unclosed_payload + _warn_if_unclosed_payload(self._body, stacklevel=3) + # NOTE: In the future, when we remove sync close support, + # this setter will need to be removed and only the async + # update_body() method will be available. For now, we call + # _close() for backwards compatibility. + self._body._close() + self._update_body(value) + + @property + def request_info(self) -> RequestInfo: + headers: CIMultiDictProxy[str] = CIMultiDictProxy(self.headers) + # These are created on every request, so we use a NamedTuple + # for performance reasons. We don't use the RequestInfo.__new__ + # method because it has a different signature which is provided + # for backwards compatibility only. + return tuple.__new__( + RequestInfo, (self.url, self.method, headers, self.original_url) + ) + + @property + def session(self) -> "ClientSession": + """Return the ClientSession instance. + + This property provides access to the ClientSession that initiated + this request, allowing middleware to make additional requests + using the same session. + """ + return self._session + + def update_host(self, url: URL) -> None: + """Update destination host, port and connection type (ssl).""" + # get host/port + if not url.raw_host: + raise InvalidURL(url) + + # basic auth info + if url.raw_user or url.raw_password: + self.auth = helpers.BasicAuth(url.user or "", url.password or "") + + def update_version(self, version: Union[http.HttpVersion, str]) -> None: + """Convert request version to two elements tuple. + + parser HTTP version '1.1' => (1, 1) + """ + if isinstance(version, str): + v = [part.strip() for part in version.split(".", 1)] + try: + version = http.HttpVersion(int(v[0]), int(v[1])) + except ValueError: + raise ValueError( + f"Can not parse http version number: {version}" + ) from None + self.version = version + + def update_headers(self, headers: Optional[LooseHeaders]) -> None: + """Update request headers.""" + self.headers: CIMultiDict[str] = CIMultiDict() + + # Build the host header + host = self.url.host_port_subcomponent + + # host_port_subcomponent is None when the URL is a relative URL. + # but we know we do not have a relative URL here. + assert host is not None + self.headers[hdrs.HOST] = host + + if not headers: + return + + if isinstance(headers, (dict, MultiDictProxy, MultiDict)): + headers = headers.items() + + for key, value in headers: # type: ignore[misc] + # A special case for Host header + if key in hdrs.HOST_ALL: + self.headers[key] = value + else: + self.headers.add(key, value) + + def update_auto_headers(self, skip_auto_headers: Optional[Iterable[str]]) -> None: + if skip_auto_headers is not None: + self._skip_auto_headers = CIMultiDict( + (hdr, None) for hdr in sorted(skip_auto_headers) + ) + used_headers = self.headers.copy() + used_headers.extend(self._skip_auto_headers) # type: ignore[arg-type] + else: + # Fast path when there are no headers to skip + # which is the most common case. + used_headers = self.headers + + for hdr, val in self.DEFAULT_HEADERS.items(): + if hdr not in used_headers: + self.headers[hdr] = val + + if hdrs.USER_AGENT not in used_headers: + self.headers[hdrs.USER_AGENT] = SERVER_SOFTWARE + + def update_cookies(self, cookies: Optional[LooseCookies]) -> None: + """Update request cookies header.""" + if not cookies: + return + + c = SimpleCookie() + if hdrs.COOKIE in self.headers: + # parse_cookie_header for RFC 6265 compliant Cookie header parsing + c.update(parse_cookie_header(self.headers.get(hdrs.COOKIE, ""))) + del self.headers[hdrs.COOKIE] + + if isinstance(cookies, Mapping): + iter_cookies = cookies.items() + else: + iter_cookies = cookies # type: ignore[assignment] + for name, value in iter_cookies: + if isinstance(value, Morsel): + # Use helper to preserve coded_value exactly as sent by server + c[name] = preserve_morsel_with_coded_value(value) + else: + c[name] = value # type: ignore[assignment] + + self.headers[hdrs.COOKIE] = c.output(header="", sep=";").strip() + + def update_content_encoding(self, data: Any) -> None: + """Set request content encoding.""" + if not data: + # Don't compress an empty body. + self.compress = None + return + + if self.headers.get(hdrs.CONTENT_ENCODING): + if self.compress: + raise ValueError( + "compress can not be set if Content-Encoding header is set" + ) + elif self.compress: + if not isinstance(self.compress, str): + self.compress = "deflate" + self.headers[hdrs.CONTENT_ENCODING] = self.compress + self.chunked = True # enable chunked, no need to deal with length + + def update_transfer_encoding(self) -> None: + """Analyze transfer-encoding header.""" + te = self.headers.get(hdrs.TRANSFER_ENCODING, "").lower() + + if "chunked" in te: + if self.chunked: + raise ValueError( + "chunked can not be set " + 'if "Transfer-Encoding: chunked" header is set' + ) + + elif self.chunked: + if hdrs.CONTENT_LENGTH in self.headers: + raise ValueError( + "chunked can not be set if Content-Length header is set" + ) + + self.headers[hdrs.TRANSFER_ENCODING] = "chunked" + + def update_auth(self, auth: Optional[BasicAuth], trust_env: bool = False) -> None: + """Set basic auth.""" + if auth is None: + auth = self.auth + if auth is None: + return + + if not isinstance(auth, helpers.BasicAuth): + raise TypeError("BasicAuth() tuple is required instead") + + self.headers[hdrs.AUTHORIZATION] = auth.encode() + + def update_body_from_data(self, body: Any, _stacklevel: int = 3) -> None: + """Update request body from data.""" + if self._body is not None: + _warn_if_unclosed_payload(self._body, stacklevel=_stacklevel) + + if body is None: + self._body = None + # Set Content-Length to 0 when body is None for methods that expect a body + if ( + self.method not in self.GET_METHODS + and not self.chunked + and hdrs.CONTENT_LENGTH not in self.headers + ): + self.headers[hdrs.CONTENT_LENGTH] = "0" + return + + # FormData + maybe_payload = body() if isinstance(body, FormData) else body + + try: + body_payload = payload.PAYLOAD_REGISTRY.get(maybe_payload, disposition=None) + except payload.LookupError: + body_payload = FormData(maybe_payload)() # type: ignore[arg-type] + + self._body = body_payload + # enable chunked encoding if needed + if not self.chunked and hdrs.CONTENT_LENGTH not in self.headers: + if (size := body_payload.size) is not None: + self.headers[hdrs.CONTENT_LENGTH] = str(size) + else: + self.chunked = True + + # copy payload headers + assert body_payload.headers + headers = self.headers + skip_headers = self._skip_auto_headers + for key, value in body_payload.headers.items(): + if key in headers or (skip_headers is not None and key in skip_headers): + continue + headers[key] = value + + def _update_body(self, body: Any) -> None: + """Update request body after its already been set.""" + # Remove existing Content-Length header since body is changing + if hdrs.CONTENT_LENGTH in self.headers: + del self.headers[hdrs.CONTENT_LENGTH] + + # Remove existing Transfer-Encoding header to avoid conflicts + if self.chunked and hdrs.TRANSFER_ENCODING in self.headers: + del self.headers[hdrs.TRANSFER_ENCODING] + + # Now update the body using the existing method + # Called from _update_body, add 1 to stacklevel from caller + self.update_body_from_data(body, _stacklevel=4) + + # Update transfer encoding headers if needed (same logic as __init__) + if body is not None or self.method not in self.GET_METHODS: + self.update_transfer_encoding() + + async def update_body(self, body: Any) -> None: + """ + Update request body and close previous payload if needed. + + This method safely updates the request body by first closing any existing + payload to prevent resource leaks, then setting the new body. + + IMPORTANT: Always use this method instead of setting request.body directly. + Direct assignment to request.body will leak resources if the previous body + contains file handles, streams, or other resources that need cleanup. + + Args: + body: The new body content. Can be: + - bytes/bytearray: Raw binary data + - str: Text data (will be encoded using charset from Content-Type) + - FormData: Form data that will be encoded as multipart/form-data + - Payload: A pre-configured payload object + - AsyncIterable: An async iterable of bytes chunks + - File-like object: Will be read and sent as binary data + - None: Clears the body + + Usage: + # CORRECT: Use update_body + await request.update_body(b"new request data") + + # WRONG: Don't set body directly + # request.body = b"new request data" # This will leak resources! + + # Update with form data + form_data = FormData() + form_data.add_field('field', 'value') + await request.update_body(form_data) + + # Clear body + await request.update_body(None) + + Note: + This method is async because it may need to close file handles or + other resources associated with the previous payload. Always await + this method to ensure proper cleanup. + + Warning: + Setting request.body directly is highly discouraged and can lead to: + - Resource leaks (unclosed file handles, streams) + - Memory leaks (unreleased buffers) + - Unexpected behavior with streaming payloads + + It is not recommended to change the payload type in middleware. If the + body was already set (e.g., as bytes), it's best to keep the same type + rather than converting it (e.g., to str) as this may result in unexpected + behavior. + + See Also: + - update_body_from_data: Synchronous body update without cleanup + - body property: Direct body access (STRONGLY DISCOURAGED) + + """ + # Close existing payload if it exists and needs closing + if self._body is not None: + await self._body.close() + self._update_body(body) + + def update_expect_continue(self, expect: bool = False) -> None: + if expect: + self.headers[hdrs.EXPECT] = "100-continue" + elif ( + hdrs.EXPECT in self.headers + and self.headers[hdrs.EXPECT].lower() == "100-continue" + ): + expect = True + + if expect: + self._continue = self.loop.create_future() + + def update_proxy( + self, + proxy: Optional[URL], + proxy_auth: Optional[BasicAuth], + proxy_headers: Optional[LooseHeaders], + ) -> None: + self.proxy = proxy + if proxy is None: + self.proxy_auth = None + self.proxy_headers = None + return + + if proxy_auth and not isinstance(proxy_auth, helpers.BasicAuth): + raise ValueError("proxy_auth must be None or BasicAuth() tuple") + self.proxy_auth = proxy_auth + + if proxy_headers is not None and not isinstance( + proxy_headers, (MultiDict, MultiDictProxy) + ): + proxy_headers = CIMultiDict(proxy_headers) + self.proxy_headers = proxy_headers + + async def write_bytes( + self, + writer: AbstractStreamWriter, + conn: "Connection", + content_length: Optional[int] = None, + ) -> None: + """ + Write the request body to the connection stream. + + This method handles writing different types of request bodies: + 1. Payload objects (using their specialized write_with_length method) + 2. Bytes/bytearray objects + 3. Iterable body content + + Args: + writer: The stream writer to write the body to + conn: The connection being used for this request + content_length: Optional maximum number of bytes to write from the body + (None means write the entire body) + + The method properly handles: + - Waiting for 100-Continue responses if required + - Content length constraints for chunked encoding + - Error handling for network issues, cancellation, and other exceptions + - Signaling EOF and timeout management + + Raises: + ClientOSError: When there's an OS-level error writing the body + ClientConnectionError: When there's a general connection error + asyncio.CancelledError: When the operation is cancelled + + """ + # 100 response + if self._continue is not None: + # Force headers to be sent before waiting for 100-continue + writer.send_headers() + await writer.drain() + await self._continue + + protocol = conn.protocol + assert protocol is not None + try: + # This should be a rare case but the + # self._body can be set to None while + # the task is being started or we wait above + # for the 100-continue response. + # The more likely case is we have an empty + # payload, but 100-continue is still expected. + if self._body is not None: + await self._body.write_with_length(writer, content_length) + except OSError as underlying_exc: + reraised_exc = underlying_exc + + # Distinguish between timeout and other OS errors for better error reporting + exc_is_not_timeout = underlying_exc.errno is not None or not isinstance( + underlying_exc, asyncio.TimeoutError + ) + if exc_is_not_timeout: + reraised_exc = ClientOSError( + underlying_exc.errno, + f"Can not write request body for {self.url !s}", + ) + + set_exception(protocol, reraised_exc, underlying_exc) + except asyncio.CancelledError: + # Body hasn't been fully sent, so connection can't be reused + conn.close() + raise + except Exception as underlying_exc: + set_exception( + protocol, + ClientConnectionError( + "Failed to send bytes into the underlying connection " + f"{conn !s}: {underlying_exc!r}", + ), + underlying_exc, + ) + else: + # Successfully wrote the body, signal EOF and start response timeout + await writer.write_eof() + protocol.start_timeout() + + async def send(self, conn: "Connection") -> "ClientResponse": + # Specify request target: + # - CONNECT request must send authority form URI + # - not CONNECT proxy must send absolute form URI + # - most common is origin form URI + if self.method == hdrs.METH_CONNECT: + connect_host = self.url.host_subcomponent + assert connect_host is not None + path = f"{connect_host}:{self.url.port}" + elif self.proxy and not self.is_ssl(): + path = str(self.url) + else: + path = self.url.raw_path_qs + + protocol = conn.protocol + assert protocol is not None + writer = StreamWriter( + protocol, + self.loop, + on_chunk_sent=( + functools.partial(self._on_chunk_request_sent, self.method, self.url) + if self._traces + else None + ), + on_headers_sent=( + functools.partial(self._on_headers_request_sent, self.method, self.url) + if self._traces + else None + ), + ) + + if self.compress: + writer.enable_compression(self.compress) # type: ignore[arg-type] + + if self.chunked is not None: + writer.enable_chunking() + + # set default content-type + if ( + self.method in self.POST_METHODS + and ( + self._skip_auto_headers is None + or hdrs.CONTENT_TYPE not in self._skip_auto_headers + ) + and hdrs.CONTENT_TYPE not in self.headers + ): + self.headers[hdrs.CONTENT_TYPE] = "application/octet-stream" + + v = self.version + if hdrs.CONNECTION not in self.headers: + if conn._connector.force_close: + if v == HttpVersion11: + self.headers[hdrs.CONNECTION] = "close" + elif v == HttpVersion10: + self.headers[hdrs.CONNECTION] = "keep-alive" + + # status + headers + status_line = f"{self.method} {path} HTTP/{v.major}.{v.minor}" + + # Buffer headers for potential coalescing with body + await writer.write_headers(status_line, self.headers) + + task: Optional["asyncio.Task[None]"] + if self._body or self._continue is not None or protocol.writing_paused: + coro = self.write_bytes(writer, conn, self._get_content_length()) + if sys.version_info >= (3, 12): + # Optimization for Python 3.12, try to write + # bytes immediately to avoid having to schedule + # the task on the event loop. + task = asyncio.Task(coro, loop=self.loop, eager_start=True) + else: + task = self.loop.create_task(coro) + if task.done(): + task = None + else: + self._writer = task + else: + # We have nothing to write because + # - there is no body + # - the protocol does not have writing paused + # - we are not waiting for a 100-continue response + protocol.start_timeout() + writer.set_eof() + task = None + response_class = self.response_class + assert response_class is not None + self.response = response_class( + self.method, + self.original_url, + writer=task, + continue100=self._continue, + timer=self._timer, + request_info=self.request_info, + traces=self._traces, + loop=self.loop, + session=self._session, + ) + return self.response + + async def close(self) -> None: + if self.__writer is not None: + try: + await self.__writer + except asyncio.CancelledError: + if ( + sys.version_info >= (3, 11) + and (task := asyncio.current_task()) + and task.cancelling() + ): + raise + + def terminate(self) -> None: + if self.__writer is not None: + if not self.loop.is_closed(): + self.__writer.cancel() + self.__writer.remove_done_callback(self.__reset_writer) + self.__writer = None + + async def _on_chunk_request_sent(self, method: str, url: URL, chunk: bytes) -> None: + for trace in self._traces: + await trace.send_request_chunk_sent(method, url, chunk) + + async def _on_headers_request_sent( + self, method: str, url: URL, headers: "CIMultiDict[str]" + ) -> None: + for trace in self._traces: + await trace.send_request_headers(method, url, headers) diff --git a/aiohttp/connector.py b/aiohttp/connector.py new file mode 100644 index 0000000000000000000000000000000000000000..290a42400f9af5257e1ca1511c3db7bd0570e9bd --- /dev/null +++ b/aiohttp/connector.py @@ -0,0 +1,1842 @@ +import asyncio +import functools +import random +import socket +import sys +import traceback +import warnings +from collections import OrderedDict, defaultdict, deque +from contextlib import suppress +from http import HTTPStatus +from itertools import chain, cycle, islice +from time import monotonic +from types import TracebackType +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + DefaultDict, + Deque, + Dict, + Iterator, + List, + Literal, + Optional, + Sequence, + Set, + Tuple, + Type, + Union, + cast, +) + +import aiohappyeyeballs +from aiohappyeyeballs import AddrInfoType, SocketFactoryType + +from . import hdrs, helpers +from .abc import AbstractResolver, ResolveResult +from .client_exceptions import ( + ClientConnectionError, + ClientConnectorCertificateError, + ClientConnectorDNSError, + ClientConnectorError, + ClientConnectorSSLError, + ClientHttpProxyError, + ClientProxyConnectionError, + ServerFingerprintMismatch, + UnixClientConnectorError, + cert_errors, + ssl_errors, +) +from .client_proto import ResponseHandler +from .client_reqrep import ClientRequest, Fingerprint, _merge_ssl_params +from .helpers import ( + _SENTINEL, + ceil_timeout, + is_ip_address, + noop, + sentinel, + set_exception, + set_result, +) +from .log import client_logger +from .resolver import DefaultResolver + +if sys.version_info >= (3, 12): + from collections.abc import Buffer +else: + Buffer = Union[bytes, bytearray, "memoryview[int]", "memoryview[bytes]"] + +if TYPE_CHECKING: + import ssl + + SSLContext = ssl.SSLContext +else: + try: + import ssl + + SSLContext = ssl.SSLContext + except ImportError: # pragma: no cover + ssl = None # type: ignore[assignment] + SSLContext = object # type: ignore[misc,assignment] + +EMPTY_SCHEMA_SET = frozenset({""}) +HTTP_SCHEMA_SET = frozenset({"http", "https"}) +WS_SCHEMA_SET = frozenset({"ws", "wss"}) + +HTTP_AND_EMPTY_SCHEMA_SET = HTTP_SCHEMA_SET | EMPTY_SCHEMA_SET +HIGH_LEVEL_SCHEMA_SET = HTTP_AND_EMPTY_SCHEMA_SET | WS_SCHEMA_SET + +NEEDS_CLEANUP_CLOSED = (3, 13, 0) <= sys.version_info < ( + 3, + 13, + 1, +) or sys.version_info < (3, 12, 7) +# Cleanup closed is no longer needed after https://github.com/python/cpython/pull/118960 +# which first appeared in Python 3.12.7 and 3.13.1 + + +__all__ = ( + "BaseConnector", + "TCPConnector", + "UnixConnector", + "NamedPipeConnector", + "AddrInfoType", + "SocketFactoryType", +) + + +if TYPE_CHECKING: + from .client import ClientTimeout + from .client_reqrep import ConnectionKey + from .tracing import Trace + + +class _DeprecationWaiter: + __slots__ = ("_awaitable", "_awaited") + + def __init__(self, awaitable: Awaitable[Any]) -> None: + self._awaitable = awaitable + self._awaited = False + + def __await__(self) -> Any: + self._awaited = True + return self._awaitable.__await__() + + def __del__(self) -> None: + if not self._awaited: + warnings.warn( + "Connector.close() is a coroutine, " + "please use await connector.close()", + DeprecationWarning, + ) + + +async def _wait_for_close(waiters: List[Awaitable[object]]) -> None: + """Wait for all waiters to finish closing.""" + results = await asyncio.gather(*waiters, return_exceptions=True) + for res in results: + if isinstance(res, Exception): + client_logger.debug("Error while closing connector: %r", res) + + +class Connection: + + _source_traceback = None + + def __init__( + self, + connector: "BaseConnector", + key: "ConnectionKey", + protocol: ResponseHandler, + loop: asyncio.AbstractEventLoop, + ) -> None: + self._key = key + self._connector = connector + self._loop = loop + self._protocol: Optional[ResponseHandler] = protocol + self._callbacks: List[Callable[[], None]] = [] + + if loop.get_debug(): + self._source_traceback = traceback.extract_stack(sys._getframe(1)) + + def __repr__(self) -> str: + return f"Connection<{self._key}>" + + def __del__(self, _warnings: Any = warnings) -> None: + if self._protocol is not None: + kwargs = {"source": self} + _warnings.warn(f"Unclosed connection {self!r}", ResourceWarning, **kwargs) + if self._loop.is_closed(): + return + + self._connector._release(self._key, self._protocol, should_close=True) + + context = {"client_connection": self, "message": "Unclosed connection"} + if self._source_traceback is not None: + context["source_traceback"] = self._source_traceback + self._loop.call_exception_handler(context) + + def __bool__(self) -> Literal[True]: + """Force subclasses to not be falsy, to make checks simpler.""" + return True + + @property + def loop(self) -> asyncio.AbstractEventLoop: + warnings.warn( + "connector.loop property is deprecated", DeprecationWarning, stacklevel=2 + ) + return self._loop + + @property + def transport(self) -> Optional[asyncio.Transport]: + if self._protocol is None: + return None + return self._protocol.transport + + @property + def protocol(self) -> Optional[ResponseHandler]: + return self._protocol + + def add_callback(self, callback: Callable[[], None]) -> None: + if callback is not None: + self._callbacks.append(callback) + + def _notify_release(self) -> None: + callbacks, self._callbacks = self._callbacks[:], [] + + for cb in callbacks: + with suppress(Exception): + cb() + + def close(self) -> None: + self._notify_release() + + if self._protocol is not None: + self._connector._release(self._key, self._protocol, should_close=True) + self._protocol = None + + def release(self) -> None: + self._notify_release() + + if self._protocol is not None: + self._connector._release(self._key, self._protocol) + self._protocol = None + + @property + def closed(self) -> bool: + return self._protocol is None or not self._protocol.is_connected() + + +class _ConnectTunnelConnection(Connection): + """Special connection wrapper for CONNECT tunnels that must never be pooled. + + This connection wraps the proxy connection that will be upgraded with TLS. + It must never be released to the pool because: + 1. Its 'closed' future will never complete, causing session.close() to hang + 2. It represents an intermediate state, not a reusable connection + 3. The real connection (with TLS) will be created separately + """ + + def release(self) -> None: + """Do nothing - don't pool or close the connection. + + These connections are an intermediate state during the CONNECT tunnel + setup and will be cleaned up naturally after the TLS upgrade. If they + were to be pooled, they would never be properly closed, causing + session.close() to wait forever for their 'closed' future. + """ + + +class _TransportPlaceholder: + """placeholder for BaseConnector.connect function""" + + __slots__ = ("closed", "transport") + + def __init__(self, closed_future: asyncio.Future[Optional[Exception]]) -> None: + """Initialize a placeholder for a transport.""" + self.closed = closed_future + self.transport = None + + def close(self) -> None: + """Close the placeholder.""" + + def abort(self) -> None: + """Abort the placeholder (does nothing).""" + + +class BaseConnector: + """Base connector class. + + keepalive_timeout - (optional) Keep-alive timeout. + force_close - Set to True to force close and do reconnect + after each request (and between redirects). + limit - The total number of simultaneous connections. + limit_per_host - Number of simultaneous connections to one host. + enable_cleanup_closed - Enables clean-up closed ssl transports. + Disabled by default. + timeout_ceil_threshold - Trigger ceiling of timeout values when + it's above timeout_ceil_threshold. + loop - Optional event loop. + """ + + _closed = True # prevent AttributeError in __del__ if ctor was failed + _source_traceback = None + + # abort transport after 2 seconds (cleanup broken connections) + _cleanup_closed_period = 2.0 + + allowed_protocol_schema_set = HIGH_LEVEL_SCHEMA_SET + + def __init__( + self, + *, + keepalive_timeout: Union[object, None, float] = sentinel, + force_close: bool = False, + limit: int = 100, + limit_per_host: int = 0, + enable_cleanup_closed: bool = False, + loop: Optional[asyncio.AbstractEventLoop] = None, + timeout_ceil_threshold: float = 5, + ) -> None: + + if force_close: + if keepalive_timeout is not None and keepalive_timeout is not sentinel: + raise ValueError( + "keepalive_timeout cannot be set if force_close is True" + ) + else: + if keepalive_timeout is sentinel: + keepalive_timeout = 15.0 + + loop = loop or asyncio.get_running_loop() + self._timeout_ceil_threshold = timeout_ceil_threshold + + self._closed = False + if loop.get_debug(): + self._source_traceback = traceback.extract_stack(sys._getframe(1)) + + # Connection pool of reusable connections. + # We use a deque to store connections because it has O(1) popleft() + # and O(1) append() operations to implement a FIFO queue. + self._conns: DefaultDict[ + ConnectionKey, Deque[Tuple[ResponseHandler, float]] + ] = defaultdict(deque) + self._limit = limit + self._limit_per_host = limit_per_host + self._acquired: Set[ResponseHandler] = set() + self._acquired_per_host: DefaultDict[ConnectionKey, Set[ResponseHandler]] = ( + defaultdict(set) + ) + self._keepalive_timeout = cast(float, keepalive_timeout) + self._force_close = force_close + + # {host_key: FIFO list of waiters} + # The FIFO is implemented with an OrderedDict with None keys because + # python does not have an ordered set. + self._waiters: DefaultDict[ + ConnectionKey, OrderedDict[asyncio.Future[None], None] + ] = defaultdict(OrderedDict) + + self._loop = loop + self._factory = functools.partial(ResponseHandler, loop=loop) + + # start keep-alive connection cleanup task + self._cleanup_handle: Optional[asyncio.TimerHandle] = None + + # start cleanup closed transports task + self._cleanup_closed_handle: Optional[asyncio.TimerHandle] = None + + if enable_cleanup_closed and not NEEDS_CLEANUP_CLOSED: + warnings.warn( + "enable_cleanup_closed ignored because " + "https://github.com/python/cpython/pull/118960 is fixed " + f"in Python version {sys.version_info}", + DeprecationWarning, + stacklevel=2, + ) + enable_cleanup_closed = False + + self._cleanup_closed_disabled = not enable_cleanup_closed + self._cleanup_closed_transports: List[Optional[asyncio.Transport]] = [] + self._placeholder_future: asyncio.Future[Optional[Exception]] = ( + loop.create_future() + ) + self._placeholder_future.set_result(None) + self._cleanup_closed() + + def __del__(self, _warnings: Any = warnings) -> None: + if self._closed: + return + if not self._conns: + return + + conns = [repr(c) for c in self._conns.values()] + + self._close() + + kwargs = {"source": self} + _warnings.warn(f"Unclosed connector {self!r}", ResourceWarning, **kwargs) + context = { + "connector": self, + "connections": conns, + "message": "Unclosed connector", + } + if self._source_traceback is not None: + context["source_traceback"] = self._source_traceback + self._loop.call_exception_handler(context) + + def __enter__(self) -> "BaseConnector": + warnings.warn( + '"with Connector():" is deprecated, ' + 'use "async with Connector():" instead', + DeprecationWarning, + ) + return self + + def __exit__(self, *exc: Any) -> None: + self._close() + + async def __aenter__(self) -> "BaseConnector": + return self + + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]] = None, + exc_value: Optional[BaseException] = None, + exc_traceback: Optional[TracebackType] = None, + ) -> None: + await self.close() + + @property + def force_close(self) -> bool: + """Ultimately close connection on releasing if True.""" + return self._force_close + + @property + def limit(self) -> int: + """The total number for simultaneous connections. + + If limit is 0 the connector has no limit. + The default limit size is 100. + """ + return self._limit + + @property + def limit_per_host(self) -> int: + """The limit for simultaneous connections to the same endpoint. + + Endpoints are the same if they are have equal + (host, port, is_ssl) triple. + """ + return self._limit_per_host + + def _cleanup(self) -> None: + """Cleanup unused transports.""" + if self._cleanup_handle: + self._cleanup_handle.cancel() + # _cleanup_handle should be unset, otherwise _release() will not + # recreate it ever! + self._cleanup_handle = None + + now = monotonic() + timeout = self._keepalive_timeout + + if self._conns: + connections = defaultdict(deque) + deadline = now - timeout + for key, conns in self._conns.items(): + alive: Deque[Tuple[ResponseHandler, float]] = deque() + for proto, use_time in conns: + if proto.is_connected() and use_time - deadline >= 0: + alive.append((proto, use_time)) + continue + transport = proto.transport + proto.close() + if not self._cleanup_closed_disabled and key.is_ssl: + self._cleanup_closed_transports.append(transport) + + if alive: + connections[key] = alive + + self._conns = connections + + if self._conns: + self._cleanup_handle = helpers.weakref_handle( + self, + "_cleanup", + timeout, + self._loop, + timeout_ceil_threshold=self._timeout_ceil_threshold, + ) + + def _cleanup_closed(self) -> None: + """Double confirmation for transport close. + + Some broken ssl servers may leave socket open without proper close. + """ + if self._cleanup_closed_handle: + self._cleanup_closed_handle.cancel() + + for transport in self._cleanup_closed_transports: + if transport is not None: + transport.abort() + + self._cleanup_closed_transports = [] + + if not self._cleanup_closed_disabled: + self._cleanup_closed_handle = helpers.weakref_handle( + self, + "_cleanup_closed", + self._cleanup_closed_period, + self._loop, + timeout_ceil_threshold=self._timeout_ceil_threshold, + ) + + def close(self, *, abort_ssl: bool = False) -> Awaitable[None]: + """Close all opened transports. + + :param abort_ssl: If True, SSL connections will be aborted immediately + without performing the shutdown handshake. This provides + faster cleanup at the cost of less graceful disconnection. + """ + if not (waiters := self._close(abort_ssl=abort_ssl)): + # If there are no connections to close, we can return a noop + # awaitable to avoid scheduling a task on the event loop. + return _DeprecationWaiter(noop()) + coro = _wait_for_close(waiters) + if sys.version_info >= (3, 12): + # Optimization for Python 3.12, try to close connections + # immediately to avoid having to schedule the task on the event loop. + task = asyncio.Task(coro, loop=self._loop, eager_start=True) + else: + task = self._loop.create_task(coro) + return _DeprecationWaiter(task) + + def _close(self, *, abort_ssl: bool = False) -> List[Awaitable[object]]: + waiters: List[Awaitable[object]] = [] + + if self._closed: + return waiters + + self._closed = True + + try: + if self._loop.is_closed(): + return waiters + + # cancel cleanup task + if self._cleanup_handle: + self._cleanup_handle.cancel() + + # cancel cleanup close task + if self._cleanup_closed_handle: + self._cleanup_closed_handle.cancel() + + for data in self._conns.values(): + for proto, _ in data: + if ( + abort_ssl + and proto.transport + and proto.transport.get_extra_info("sslcontext") is not None + ): + proto.abort() + else: + proto.close() + if closed := proto.closed: + waiters.append(closed) + + for proto in self._acquired: + if ( + abort_ssl + and proto.transport + and proto.transport.get_extra_info("sslcontext") is not None + ): + proto.abort() + else: + proto.close() + if closed := proto.closed: + waiters.append(closed) + + for transport in self._cleanup_closed_transports: + if transport is not None: + transport.abort() + + return waiters + + finally: + self._conns.clear() + self._acquired.clear() + for keyed_waiters in self._waiters.values(): + for keyed_waiter in keyed_waiters: + keyed_waiter.cancel() + self._waiters.clear() + self._cleanup_handle = None + self._cleanup_closed_transports.clear() + self._cleanup_closed_handle = None + + @property + def closed(self) -> bool: + """Is connector closed. + + A readonly property. + """ + return self._closed + + def _available_connections(self, key: "ConnectionKey") -> int: + """ + Return number of available connections. + + The limit, limit_per_host and the connection key are taken into account. + + If it returns less than 1 means that there are no connections + available. + """ + # check total available connections + # If there are no limits, this will always return 1 + total_remain = 1 + + if self._limit and (total_remain := self._limit - len(self._acquired)) <= 0: + return total_remain + + # check limit per host + if host_remain := self._limit_per_host: + if acquired := self._acquired_per_host.get(key): + host_remain -= len(acquired) + if total_remain > host_remain: + return host_remain + + return total_remain + + def _update_proxy_auth_header_and_build_proxy_req( + self, req: ClientRequest + ) -> ClientRequest: + """Set Proxy-Authorization header for non-SSL proxy requests and builds the proxy request for SSL proxy requests.""" + url = req.proxy + assert url is not None + headers: Dict[str, str] = {} + if req.proxy_headers is not None: + headers = req.proxy_headers # type: ignore[assignment] + headers[hdrs.HOST] = req.headers[hdrs.HOST] + proxy_req = ClientRequest( + hdrs.METH_GET, + url, + headers=headers, + auth=req.proxy_auth, + loop=self._loop, + ssl=req.ssl, + ) + auth = proxy_req.headers.pop(hdrs.AUTHORIZATION, None) + if auth is not None: + if not req.is_ssl(): + req.headers[hdrs.PROXY_AUTHORIZATION] = auth + else: + proxy_req.headers[hdrs.PROXY_AUTHORIZATION] = auth + return proxy_req + + async def connect( + self, req: ClientRequest, traces: List["Trace"], timeout: "ClientTimeout" + ) -> Connection: + """Get from pool or create new connection.""" + key = req.connection_key + if (conn := await self._get(key, traces)) is not None: + # If we do not have to wait and we can get a connection from the pool + # we can avoid the timeout ceil logic and directly return the connection + if req.proxy: + self._update_proxy_auth_header_and_build_proxy_req(req) + return conn + + async with ceil_timeout(timeout.connect, timeout.ceil_threshold): + if self._available_connections(key) <= 0: + await self._wait_for_available_connection(key, traces) + if (conn := await self._get(key, traces)) is not None: + if req.proxy: + self._update_proxy_auth_header_and_build_proxy_req(req) + return conn + + placeholder = cast( + ResponseHandler, _TransportPlaceholder(self._placeholder_future) + ) + self._acquired.add(placeholder) + if self._limit_per_host: + self._acquired_per_host[key].add(placeholder) + + try: + # Traces are done inside the try block to ensure that the + # that the placeholder is still cleaned up if an exception + # is raised. + if traces: + for trace in traces: + await trace.send_connection_create_start() + proto = await self._create_connection(req, traces, timeout) + if traces: + for trace in traces: + await trace.send_connection_create_end() + except BaseException: + self._release_acquired(key, placeholder) + raise + else: + if self._closed: + proto.close() + raise ClientConnectionError("Connector is closed.") + + # The connection was successfully created, drop the placeholder + # and add the real connection to the acquired set. There should + # be no awaits after the proto is added to the acquired set + # to ensure that the connection is not left in the acquired set + # on cancellation. + self._acquired.remove(placeholder) + self._acquired.add(proto) + if self._limit_per_host: + acquired_per_host = self._acquired_per_host[key] + acquired_per_host.remove(placeholder) + acquired_per_host.add(proto) + return Connection(self, key, proto, self._loop) + + async def _wait_for_available_connection( + self, key: "ConnectionKey", traces: List["Trace"] + ) -> None: + """Wait for an available connection slot.""" + # We loop here because there is a race between + # the connection limit check and the connection + # being acquired. If the connection is acquired + # between the check and the await statement, we + # need to loop again to check if the connection + # slot is still available. + attempts = 0 + while True: + fut: asyncio.Future[None] = self._loop.create_future() + keyed_waiters = self._waiters[key] + keyed_waiters[fut] = None + if attempts: + # If we have waited before, we need to move the waiter + # to the front of the queue as otherwise we might get + # starved and hit the timeout. + keyed_waiters.move_to_end(fut, last=False) + + try: + # Traces happen in the try block to ensure that the + # the waiter is still cleaned up if an exception is raised. + if traces: + for trace in traces: + await trace.send_connection_queued_start() + await fut + if traces: + for trace in traces: + await trace.send_connection_queued_end() + finally: + # pop the waiter from the queue if its still + # there and not already removed by _release_waiter + keyed_waiters.pop(fut, None) + if not self._waiters.get(key, True): + del self._waiters[key] + + if self._available_connections(key) > 0: + break + attempts += 1 + + async def _get( + self, key: "ConnectionKey", traces: List["Trace"] + ) -> Optional[Connection]: + """Get next reusable connection for the key or None. + + The connection will be marked as acquired. + """ + if (conns := self._conns.get(key)) is None: + return None + + t1 = monotonic() + while conns: + proto, t0 = conns.popleft() + # We will we reuse the connection if its connected and + # the keepalive timeout has not been exceeded + if proto.is_connected() and t1 - t0 <= self._keepalive_timeout: + if not conns: + # The very last connection was reclaimed: drop the key + del self._conns[key] + self._acquired.add(proto) + if self._limit_per_host: + self._acquired_per_host[key].add(proto) + if traces: + for trace in traces: + try: + await trace.send_connection_reuseconn() + except BaseException: + self._release_acquired(key, proto) + raise + return Connection(self, key, proto, self._loop) + + # Connection cannot be reused, close it + transport = proto.transport + proto.close() + # only for SSL transports + if not self._cleanup_closed_disabled and key.is_ssl: + self._cleanup_closed_transports.append(transport) + + # No more connections: drop the key + del self._conns[key] + return None + + def _release_waiter(self) -> None: + """ + Iterates over all waiters until one to be released is found. + + The one to be released is not finished and + belongs to a host that has available connections. + """ + if not self._waiters: + return + + # Having the dict keys ordered this avoids to iterate + # at the same order at each call. + queues = list(self._waiters) + random.shuffle(queues) + + for key in queues: + if self._available_connections(key) < 1: + continue + + waiters = self._waiters[key] + while waiters: + waiter, _ = waiters.popitem(last=False) + if not waiter.done(): + waiter.set_result(None) + return + + def _release_acquired(self, key: "ConnectionKey", proto: ResponseHandler) -> None: + """Release acquired connection.""" + if self._closed: + # acquired connection is already released on connector closing + return + + self._acquired.discard(proto) + if self._limit_per_host and (conns := self._acquired_per_host.get(key)): + conns.discard(proto) + if not conns: + del self._acquired_per_host[key] + self._release_waiter() + + def _release( + self, + key: "ConnectionKey", + protocol: ResponseHandler, + *, + should_close: bool = False, + ) -> None: + if self._closed: + # acquired connection is already released on connector closing + return + + self._release_acquired(key, protocol) + + if self._force_close or should_close or protocol.should_close: + transport = protocol.transport + protocol.close() + + if key.is_ssl and not self._cleanup_closed_disabled: + self._cleanup_closed_transports.append(transport) + return + + self._conns[key].append((protocol, monotonic())) + + if self._cleanup_handle is None: + self._cleanup_handle = helpers.weakref_handle( + self, + "_cleanup", + self._keepalive_timeout, + self._loop, + timeout_ceil_threshold=self._timeout_ceil_threshold, + ) + + async def _create_connection( + self, req: ClientRequest, traces: List["Trace"], timeout: "ClientTimeout" + ) -> ResponseHandler: + raise NotImplementedError() + + +class _DNSCacheTable: + def __init__(self, ttl: Optional[float] = None) -> None: + self._addrs_rr: Dict[Tuple[str, int], Tuple[Iterator[ResolveResult], int]] = {} + self._timestamps: Dict[Tuple[str, int], float] = {} + self._ttl = ttl + + def __contains__(self, host: object) -> bool: + return host in self._addrs_rr + + def add(self, key: Tuple[str, int], addrs: List[ResolveResult]) -> None: + self._addrs_rr[key] = (cycle(addrs), len(addrs)) + + if self._ttl is not None: + self._timestamps[key] = monotonic() + + def remove(self, key: Tuple[str, int]) -> None: + self._addrs_rr.pop(key, None) + + if self._ttl is not None: + self._timestamps.pop(key, None) + + def clear(self) -> None: + self._addrs_rr.clear() + self._timestamps.clear() + + def next_addrs(self, key: Tuple[str, int]) -> List[ResolveResult]: + loop, length = self._addrs_rr[key] + addrs = list(islice(loop, length)) + # Consume one more element to shift internal state of `cycle` + next(loop) + return addrs + + def expired(self, key: Tuple[str, int]) -> bool: + if self._ttl is None: + return False + + return self._timestamps[key] + self._ttl < monotonic() + + +def _make_ssl_context(verified: bool) -> SSLContext: + """Create SSL context. + + This method is not async-friendly and should be called from a thread + because it will load certificates from disk and do other blocking I/O. + """ + if ssl is None: + # No ssl support + return None + if verified: + sslcontext = ssl.create_default_context() + else: + sslcontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + sslcontext.options |= ssl.OP_NO_SSLv2 + sslcontext.options |= ssl.OP_NO_SSLv3 + sslcontext.check_hostname = False + sslcontext.verify_mode = ssl.CERT_NONE + sslcontext.options |= ssl.OP_NO_COMPRESSION + sslcontext.set_default_verify_paths() + sslcontext.set_alpn_protocols(("http/1.1",)) + return sslcontext + + +# The default SSLContext objects are created at import time +# since they do blocking I/O to load certificates from disk, +# and imports should always be done before the event loop starts +# or in a thread. +_SSL_CONTEXT_VERIFIED = _make_ssl_context(True) +_SSL_CONTEXT_UNVERIFIED = _make_ssl_context(False) + + +class TCPConnector(BaseConnector): + """TCP connector. + + verify_ssl - Set to True to check ssl certifications. + fingerprint - Pass the binary sha256 + digest of the expected certificate in DER format to verify + that the certificate the server presents matches. See also + https://en.wikipedia.org/wiki/HTTP_Public_Key_Pinning + resolver - Enable DNS lookups and use this + resolver + use_dns_cache - Use memory cache for DNS lookups. + ttl_dns_cache - Max seconds having cached a DNS entry, None forever. + family - socket address family + local_addr - local tuple of (host, port) to bind socket to + + keepalive_timeout - (optional) Keep-alive timeout. + force_close - Set to True to force close and do reconnect + after each request (and between redirects). + limit - The total number of simultaneous connections. + limit_per_host - Number of simultaneous connections to one host. + enable_cleanup_closed - Enables clean-up closed ssl transports. + Disabled by default. + happy_eyeballs_delay - This is the “Connection Attempt Delay” + as defined in RFC 8305. To disable + the happy eyeballs algorithm, set to None. + interleave - “First Address Family Count” as defined in RFC 8305 + loop - Optional event loop. + socket_factory - A SocketFactoryType function that, if supplied, + will be used to create sockets given an + AddrInfoType. + ssl_shutdown_timeout - DEPRECATED. Will be removed in aiohttp 4.0. + Grace period for SSL shutdown handshake on TLS + connections. Default is 0 seconds (immediate abort). + This parameter allowed for a clean SSL shutdown by + notifying the remote peer of connection closure, + while avoiding excessive delays during connector cleanup. + Note: Only takes effect on Python 3.11+. + """ + + allowed_protocol_schema_set = HIGH_LEVEL_SCHEMA_SET | frozenset({"tcp"}) + + def __init__( + self, + *, + verify_ssl: bool = True, + fingerprint: Optional[bytes] = None, + use_dns_cache: bool = True, + ttl_dns_cache: Optional[int] = 10, + family: socket.AddressFamily = socket.AddressFamily.AF_UNSPEC, + ssl_context: Optional[SSLContext] = None, + ssl: Union[bool, Fingerprint, SSLContext] = True, + local_addr: Optional[Tuple[str, int]] = None, + resolver: Optional[AbstractResolver] = None, + keepalive_timeout: Union[None, float, object] = sentinel, + force_close: bool = False, + limit: int = 100, + limit_per_host: int = 0, + enable_cleanup_closed: bool = False, + loop: Optional[asyncio.AbstractEventLoop] = None, + timeout_ceil_threshold: float = 5, + happy_eyeballs_delay: Optional[float] = 0.25, + interleave: Optional[int] = None, + socket_factory: Optional[SocketFactoryType] = None, + ssl_shutdown_timeout: Union[_SENTINEL, None, float] = sentinel, + ): + super().__init__( + keepalive_timeout=keepalive_timeout, + force_close=force_close, + limit=limit, + limit_per_host=limit_per_host, + enable_cleanup_closed=enable_cleanup_closed, + loop=loop, + timeout_ceil_threshold=timeout_ceil_threshold, + ) + + self._ssl = _merge_ssl_params(ssl, verify_ssl, ssl_context, fingerprint) + + self._resolver: AbstractResolver + if resolver is None: + self._resolver = DefaultResolver(loop=self._loop) + self._resolver_owner = True + else: + self._resolver = resolver + self._resolver_owner = False + + self._use_dns_cache = use_dns_cache + self._cached_hosts = _DNSCacheTable(ttl=ttl_dns_cache) + self._throttle_dns_futures: Dict[ + Tuple[str, int], Set["asyncio.Future[None]"] + ] = {} + self._family = family + self._local_addr_infos = aiohappyeyeballs.addr_to_addr_infos(local_addr) + self._happy_eyeballs_delay = happy_eyeballs_delay + self._interleave = interleave + self._resolve_host_tasks: Set["asyncio.Task[List[ResolveResult]]"] = set() + self._socket_factory = socket_factory + self._ssl_shutdown_timeout: Optional[float] + # Handle ssl_shutdown_timeout with warning for Python < 3.11 + if ssl_shutdown_timeout is sentinel: + self._ssl_shutdown_timeout = 0 + else: + # Deprecation warning for ssl_shutdown_timeout parameter + warnings.warn( + "The ssl_shutdown_timeout parameter is deprecated and will be removed in aiohttp 4.0", + DeprecationWarning, + stacklevel=2, + ) + if ( + sys.version_info < (3, 11) + and ssl_shutdown_timeout is not None + and ssl_shutdown_timeout != 0 + ): + warnings.warn( + f"ssl_shutdown_timeout={ssl_shutdown_timeout} is ignored on Python < 3.11; " + "only ssl_shutdown_timeout=0 is supported. The timeout will be ignored.", + RuntimeWarning, + stacklevel=2, + ) + self._ssl_shutdown_timeout = ssl_shutdown_timeout + + def _close(self, *, abort_ssl: bool = False) -> List[Awaitable[object]]: + """Close all ongoing DNS calls.""" + for fut in chain.from_iterable(self._throttle_dns_futures.values()): + fut.cancel() + + waiters = super()._close(abort_ssl=abort_ssl) + + for t in self._resolve_host_tasks: + t.cancel() + waiters.append(t) + + return waiters + + async def close(self, *, abort_ssl: bool = False) -> None: + """ + Close all opened transports. + + :param abort_ssl: If True, SSL connections will be aborted immediately + without performing the shutdown handshake. If False (default), + the behavior is determined by ssl_shutdown_timeout: + - If ssl_shutdown_timeout=0: connections are aborted + - If ssl_shutdown_timeout>0: graceful shutdown is performed + """ + if self._resolver_owner: + await self._resolver.close() + # Use abort_ssl param if explicitly set, otherwise use ssl_shutdown_timeout default + await super().close(abort_ssl=abort_ssl or self._ssl_shutdown_timeout == 0) + + @property + def family(self) -> int: + """Socket family like AF_INET.""" + return self._family + + @property + def use_dns_cache(self) -> bool: + """True if local DNS caching is enabled.""" + return self._use_dns_cache + + def clear_dns_cache( + self, host: Optional[str] = None, port: Optional[int] = None + ) -> None: + """Remove specified host/port or clear all dns local cache.""" + if host is not None and port is not None: + self._cached_hosts.remove((host, port)) + elif host is not None or port is not None: + raise ValueError("either both host and port or none of them are allowed") + else: + self._cached_hosts.clear() + + async def _resolve_host( + self, host: str, port: int, traces: Optional[Sequence["Trace"]] = None + ) -> List[ResolveResult]: + """Resolve host and return list of addresses.""" + if is_ip_address(host): + return [ + { + "hostname": host, + "host": host, + "port": port, + "family": self._family, + "proto": 0, + "flags": 0, + } + ] + + if not self._use_dns_cache: + + if traces: + for trace in traces: + await trace.send_dns_resolvehost_start(host) + + res = await self._resolver.resolve(host, port, family=self._family) + + if traces: + for trace in traces: + await trace.send_dns_resolvehost_end(host) + + return res + + key = (host, port) + if key in self._cached_hosts and not self._cached_hosts.expired(key): + # get result early, before any await (#4014) + result = self._cached_hosts.next_addrs(key) + + if traces: + for trace in traces: + await trace.send_dns_cache_hit(host) + return result + + futures: Set["asyncio.Future[None]"] + # + # If multiple connectors are resolving the same host, we wait + # for the first one to resolve and then use the result for all of them. + # We use a throttle to ensure that we only resolve the host once + # and then use the result for all the waiters. + # + if key in self._throttle_dns_futures: + # get futures early, before any await (#4014) + futures = self._throttle_dns_futures[key] + future: asyncio.Future[None] = self._loop.create_future() + futures.add(future) + if traces: + for trace in traces: + await trace.send_dns_cache_hit(host) + try: + await future + finally: + futures.discard(future) + return self._cached_hosts.next_addrs(key) + + # update dict early, before any await (#4014) + self._throttle_dns_futures[key] = futures = set() + # In this case we need to create a task to ensure that we can shield + # the task from cancellation as cancelling this lookup should not cancel + # the underlying lookup or else the cancel event will get broadcast to + # all the waiters across all connections. + # + coro = self._resolve_host_with_throttle(key, host, port, futures, traces) + loop = asyncio.get_running_loop() + if sys.version_info >= (3, 12): + # Optimization for Python 3.12, try to send immediately + resolved_host_task = asyncio.Task(coro, loop=loop, eager_start=True) + else: + resolved_host_task = loop.create_task(coro) + + if not resolved_host_task.done(): + self._resolve_host_tasks.add(resolved_host_task) + resolved_host_task.add_done_callback(self._resolve_host_tasks.discard) + + try: + return await asyncio.shield(resolved_host_task) + except asyncio.CancelledError: + + def drop_exception(fut: "asyncio.Future[List[ResolveResult]]") -> None: + with suppress(Exception, asyncio.CancelledError): + fut.result() + + resolved_host_task.add_done_callback(drop_exception) + raise + + async def _resolve_host_with_throttle( + self, + key: Tuple[str, int], + host: str, + port: int, + futures: Set["asyncio.Future[None]"], + traces: Optional[Sequence["Trace"]], + ) -> List[ResolveResult]: + """Resolve host and set result for all waiters. + + This method must be run in a task and shielded from cancellation + to avoid cancelling the underlying lookup. + """ + try: + if traces: + for trace in traces: + await trace.send_dns_cache_miss(host) + + for trace in traces: + await trace.send_dns_resolvehost_start(host) + + addrs = await self._resolver.resolve(host, port, family=self._family) + if traces: + for trace in traces: + await trace.send_dns_resolvehost_end(host) + + self._cached_hosts.add(key, addrs) + for fut in futures: + set_result(fut, None) + except BaseException as e: + # any DNS exception is set for the waiters to raise the same exception. + # This coro is always run in task that is shielded from cancellation so + # we should never be propagating cancellation here. + for fut in futures: + set_exception(fut, e) + raise + finally: + self._throttle_dns_futures.pop(key) + + return self._cached_hosts.next_addrs(key) + + async def _create_connection( + self, req: ClientRequest, traces: List["Trace"], timeout: "ClientTimeout" + ) -> ResponseHandler: + """Create connection. + + Has same keyword arguments as BaseEventLoop.create_connection. + """ + if req.proxy: + _, proto = await self._create_proxy_connection(req, traces, timeout) + else: + _, proto = await self._create_direct_connection(req, traces, timeout) + + return proto + + def _get_ssl_context(self, req: ClientRequest) -> Optional[SSLContext]: + """Logic to get the correct SSL context + + 0. if req.ssl is false, return None + + 1. if ssl_context is specified in req, use it + 2. if _ssl_context is specified in self, use it + 3. otherwise: + 1. if verify_ssl is not specified in req, use self.ssl_context + (will generate a default context according to self.verify_ssl) + 2. if verify_ssl is True in req, generate a default SSL context + 3. if verify_ssl is False in req, generate a SSL context that + won't verify + """ + if not req.is_ssl(): + return None + + if ssl is None: # pragma: no cover + raise RuntimeError("SSL is not supported.") + sslcontext = req.ssl + if isinstance(sslcontext, ssl.SSLContext): + return sslcontext + if sslcontext is not True: + # not verified or fingerprinted + return _SSL_CONTEXT_UNVERIFIED + sslcontext = self._ssl + if isinstance(sslcontext, ssl.SSLContext): + return sslcontext + if sslcontext is not True: + # not verified or fingerprinted + return _SSL_CONTEXT_UNVERIFIED + return _SSL_CONTEXT_VERIFIED + + def _get_fingerprint(self, req: ClientRequest) -> Optional["Fingerprint"]: + ret = req.ssl + if isinstance(ret, Fingerprint): + return ret + ret = self._ssl + if isinstance(ret, Fingerprint): + return ret + return None + + async def _wrap_create_connection( + self, + *args: Any, + addr_infos: List[AddrInfoType], + req: ClientRequest, + timeout: "ClientTimeout", + client_error: Type[Exception] = ClientConnectorError, + **kwargs: Any, + ) -> Tuple[asyncio.Transport, ResponseHandler]: + try: + async with ceil_timeout( + timeout.sock_connect, ceil_threshold=timeout.ceil_threshold + ): + sock = await aiohappyeyeballs.start_connection( + addr_infos=addr_infos, + local_addr_infos=self._local_addr_infos, + happy_eyeballs_delay=self._happy_eyeballs_delay, + interleave=self._interleave, + loop=self._loop, + socket_factory=self._socket_factory, + ) + # Add ssl_shutdown_timeout for Python 3.11+ when SSL is used + if ( + kwargs.get("ssl") + and self._ssl_shutdown_timeout + and sys.version_info >= (3, 11) + ): + kwargs["ssl_shutdown_timeout"] = self._ssl_shutdown_timeout + return await self._loop.create_connection(*args, **kwargs, sock=sock) + except cert_errors as exc: + raise ClientConnectorCertificateError(req.connection_key, exc) from exc + except ssl_errors as exc: + raise ClientConnectorSSLError(req.connection_key, exc) from exc + except OSError as exc: + if exc.errno is None and isinstance(exc, asyncio.TimeoutError): + raise + raise client_error(req.connection_key, exc) from exc + + async def _wrap_existing_connection( + self, + *args: Any, + req: ClientRequest, + timeout: "ClientTimeout", + client_error: Type[Exception] = ClientConnectorError, + **kwargs: Any, + ) -> Tuple[asyncio.Transport, ResponseHandler]: + try: + async with ceil_timeout( + timeout.sock_connect, ceil_threshold=timeout.ceil_threshold + ): + return await self._loop.create_connection(*args, **kwargs) + except cert_errors as exc: + raise ClientConnectorCertificateError(req.connection_key, exc) from exc + except ssl_errors as exc: + raise ClientConnectorSSLError(req.connection_key, exc) from exc + except OSError as exc: + if exc.errno is None and isinstance(exc, asyncio.TimeoutError): + raise + raise client_error(req.connection_key, exc) from exc + + def _fail_on_no_start_tls(self, req: "ClientRequest") -> None: + """Raise a :py:exc:`RuntimeError` on missing ``start_tls()``. + + It is necessary for TLS-in-TLS so that it is possible to + send HTTPS queries through HTTPS proxies. + + This doesn't affect regular HTTP requests, though. + """ + if not req.is_ssl(): + return + + proxy_url = req.proxy + assert proxy_url is not None + if proxy_url.scheme != "https": + return + + self._check_loop_for_start_tls() + + def _check_loop_for_start_tls(self) -> None: + try: + self._loop.start_tls + except AttributeError as attr_exc: + raise RuntimeError( + "An HTTPS request is being sent through an HTTPS proxy. " + "This needs support for TLS in TLS but it is not implemented " + "in your runtime for the stdlib asyncio.\n\n" + "Please upgrade to Python 3.11 or higher. For more details, " + "please see:\n" + "* https://bugs.python.org/issue37179\n" + "* https://github.com/python/cpython/pull/28073\n" + "* https://docs.aiohttp.org/en/stable/" + "client_advanced.html#proxy-support\n" + "* https://github.com/aio-libs/aiohttp/discussions/6044\n", + ) from attr_exc + + def _loop_supports_start_tls(self) -> bool: + try: + self._check_loop_for_start_tls() + except RuntimeError: + return False + else: + return True + + def _warn_about_tls_in_tls( + self, + underlying_transport: asyncio.Transport, + req: ClientRequest, + ) -> None: + """Issue a warning if the requested URL has HTTPS scheme.""" + if req.request_info.url.scheme != "https": + return + + # Check if uvloop is being used, which supports TLS in TLS, + # otherwise assume that asyncio's native transport is being used. + if type(underlying_transport).__module__.startswith("uvloop"): + return + + # Support in asyncio was added in Python 3.11 (bpo-44011) + asyncio_supports_tls_in_tls = sys.version_info >= (3, 11) or getattr( + underlying_transport, + "_start_tls_compatible", + False, + ) + + if asyncio_supports_tls_in_tls: + return + + warnings.warn( + "An HTTPS request is being sent through an HTTPS proxy. " + "This support for TLS in TLS is known to be disabled " + "in the stdlib asyncio (Python <3.11). This is why you'll probably see " + "an error in the log below.\n\n" + "It is possible to enable it via monkeypatching. " + "For more details, see:\n" + "* https://bugs.python.org/issue37179\n" + "* https://github.com/python/cpython/pull/28073\n\n" + "You can temporarily patch this as follows:\n" + "* https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support\n" + "* https://github.com/aio-libs/aiohttp/discussions/6044\n", + RuntimeWarning, + source=self, + # Why `4`? At least 3 of the calls in the stack originate + # from the methods in this class. + stacklevel=3, + ) + + async def _start_tls_connection( + self, + underlying_transport: asyncio.Transport, + req: ClientRequest, + timeout: "ClientTimeout", + client_error: Type[Exception] = ClientConnectorError, + ) -> Tuple[asyncio.BaseTransport, ResponseHandler]: + """Wrap the raw TCP transport with TLS.""" + tls_proto = self._factory() # Create a brand new proto for TLS + sslcontext = self._get_ssl_context(req) + if TYPE_CHECKING: + # _start_tls_connection is unreachable in the current code path + # if sslcontext is None. + assert sslcontext is not None + + try: + async with ceil_timeout( + timeout.sock_connect, ceil_threshold=timeout.ceil_threshold + ): + try: + # ssl_shutdown_timeout is only available in Python 3.11+ + if sys.version_info >= (3, 11) and self._ssl_shutdown_timeout: + tls_transport = await self._loop.start_tls( + underlying_transport, + tls_proto, + sslcontext, + server_hostname=req.server_hostname or req.host, + ssl_handshake_timeout=timeout.total, + ssl_shutdown_timeout=self._ssl_shutdown_timeout, + ) + else: + tls_transport = await self._loop.start_tls( + underlying_transport, + tls_proto, + sslcontext, + server_hostname=req.server_hostname or req.host, + ssl_handshake_timeout=timeout.total, + ) + except BaseException: + # We need to close the underlying transport since + # `start_tls()` probably failed before it had a + # chance to do this: + if self._ssl_shutdown_timeout == 0: + underlying_transport.abort() + else: + underlying_transport.close() + raise + if isinstance(tls_transport, asyncio.Transport): + fingerprint = self._get_fingerprint(req) + if fingerprint: + try: + fingerprint.check(tls_transport) + except ServerFingerprintMismatch: + tls_transport.close() + if not self._cleanup_closed_disabled: + self._cleanup_closed_transports.append(tls_transport) + raise + except cert_errors as exc: + raise ClientConnectorCertificateError(req.connection_key, exc) from exc + except ssl_errors as exc: + raise ClientConnectorSSLError(req.connection_key, exc) from exc + except OSError as exc: + if exc.errno is None and isinstance(exc, asyncio.TimeoutError): + raise + raise client_error(req.connection_key, exc) from exc + except TypeError as type_err: + # Example cause looks like this: + # TypeError: transport is not supported by start_tls() + + raise ClientConnectionError( + "Cannot initialize a TLS-in-TLS connection to host " + f"{req.host!s}:{req.port:d} through an underlying connection " + f"to an HTTPS proxy {req.proxy!s} ssl:{req.ssl or 'default'} " + f"[{type_err!s}]" + ) from type_err + else: + if tls_transport is None: + msg = "Failed to start TLS (possibly caused by closing transport)" + raise client_error(req.connection_key, OSError(msg)) + tls_proto.connection_made( + tls_transport + ) # Kick the state machine of the new TLS protocol + + return tls_transport, tls_proto + + def _convert_hosts_to_addr_infos( + self, hosts: List[ResolveResult] + ) -> List[AddrInfoType]: + """Converts the list of hosts to a list of addr_infos. + + The list of hosts is the result of a DNS lookup. The list of + addr_infos is the result of a call to `socket.getaddrinfo()`. + """ + addr_infos: List[AddrInfoType] = [] + for hinfo in hosts: + host = hinfo["host"] + is_ipv6 = ":" in host + family = socket.AF_INET6 if is_ipv6 else socket.AF_INET + if self._family and self._family != family: + continue + addr = (host, hinfo["port"], 0, 0) if is_ipv6 else (host, hinfo["port"]) + addr_infos.append( + (family, socket.SOCK_STREAM, socket.IPPROTO_TCP, "", addr) + ) + return addr_infos + + async def _create_direct_connection( + self, + req: ClientRequest, + traces: List["Trace"], + timeout: "ClientTimeout", + *, + client_error: Type[Exception] = ClientConnectorError, + ) -> Tuple[asyncio.Transport, ResponseHandler]: + sslcontext = self._get_ssl_context(req) + fingerprint = self._get_fingerprint(req) + + host = req.url.raw_host + assert host is not None + # Replace multiple trailing dots with a single one. + # A trailing dot is only present for fully-qualified domain names. + # See https://github.com/aio-libs/aiohttp/pull/7364. + if host.endswith(".."): + host = host.rstrip(".") + "." + port = req.port + assert port is not None + try: + # Cancelling this lookup should not cancel the underlying lookup + # or else the cancel event will get broadcast to all the waiters + # across all connections. + hosts = await self._resolve_host(host, port, traces=traces) + except OSError as exc: + if exc.errno is None and isinstance(exc, asyncio.TimeoutError): + raise + # in case of proxy it is not ClientProxyConnectionError + # it is problem of resolving proxy ip itself + raise ClientConnectorDNSError(req.connection_key, exc) from exc + + last_exc: Optional[Exception] = None + addr_infos = self._convert_hosts_to_addr_infos(hosts) + while addr_infos: + # Strip trailing dots, certificates contain FQDN without dots. + # See https://github.com/aio-libs/aiohttp/issues/3636 + server_hostname = ( + (req.server_hostname or host).rstrip(".") if sslcontext else None + ) + + try: + transp, proto = await self._wrap_create_connection( + self._factory, + timeout=timeout, + ssl=sslcontext, + addr_infos=addr_infos, + server_hostname=server_hostname, + req=req, + client_error=client_error, + ) + except (ClientConnectorError, asyncio.TimeoutError) as exc: + last_exc = exc + aiohappyeyeballs.pop_addr_infos_interleave(addr_infos, self._interleave) + continue + + if req.is_ssl() and fingerprint: + try: + fingerprint.check(transp) + except ServerFingerprintMismatch as exc: + transp.close() + if not self._cleanup_closed_disabled: + self._cleanup_closed_transports.append(transp) + last_exc = exc + # Remove the bad peer from the list of addr_infos + sock: socket.socket = transp.get_extra_info("socket") + bad_peer = sock.getpeername() + aiohappyeyeballs.remove_addr_infos(addr_infos, bad_peer) + continue + + return transp, proto + else: + assert last_exc is not None + raise last_exc + + async def _create_proxy_connection( + self, req: ClientRequest, traces: List["Trace"], timeout: "ClientTimeout" + ) -> Tuple[asyncio.BaseTransport, ResponseHandler]: + self._fail_on_no_start_tls(req) + runtime_has_start_tls = self._loop_supports_start_tls() + proxy_req = self._update_proxy_auth_header_and_build_proxy_req(req) + + # create connection to proxy server + transport, proto = await self._create_direct_connection( + proxy_req, [], timeout, client_error=ClientProxyConnectionError + ) + + if req.is_ssl(): + if runtime_has_start_tls: + self._warn_about_tls_in_tls(transport, req) + + # For HTTPS requests over HTTP proxy + # we must notify proxy to tunnel connection + # so we send CONNECT command: + # CONNECT www.python.org:443 HTTP/1.1 + # Host: www.python.org + # + # next we must do TLS handshake and so on + # to do this we must wrap raw socket into secure one + # asyncio handles this perfectly + proxy_req.method = hdrs.METH_CONNECT + proxy_req.url = req.url + key = req.connection_key._replace( + proxy=None, proxy_auth=None, proxy_headers_hash=None + ) + conn = _ConnectTunnelConnection(self, key, proto, self._loop) + proxy_resp = await proxy_req.send(conn) + try: + protocol = conn._protocol + assert protocol is not None + + # read_until_eof=True will ensure the connection isn't closed + # once the response is received and processed allowing + # START_TLS to work on the connection below. + protocol.set_response_params( + read_until_eof=runtime_has_start_tls, + timeout_ceil_threshold=self._timeout_ceil_threshold, + ) + resp = await proxy_resp.start(conn) + except BaseException: + proxy_resp.close() + conn.close() + raise + else: + conn._protocol = None + try: + if resp.status != 200: + message = resp.reason + if message is None: + message = HTTPStatus(resp.status).phrase + raise ClientHttpProxyError( + proxy_resp.request_info, + resp.history, + status=resp.status, + message=message, + headers=resp.headers, + ) + if not runtime_has_start_tls: + rawsock = transport.get_extra_info("socket", default=None) + if rawsock is None: + raise RuntimeError( + "Transport does not expose socket instance" + ) + # Duplicate the socket, so now we can close proxy transport + rawsock = rawsock.dup() + except BaseException: + # It shouldn't be closed in `finally` because it's fed to + # `loop.start_tls()` and the docs say not to touch it after + # passing there. + transport.close() + raise + finally: + if not runtime_has_start_tls: + transport.close() + + if not runtime_has_start_tls: + # HTTP proxy with support for upgrade to HTTPS + sslcontext = self._get_ssl_context(req) + return await self._wrap_existing_connection( + self._factory, + timeout=timeout, + ssl=sslcontext, + sock=rawsock, + server_hostname=req.host, + req=req, + ) + + return await self._start_tls_connection( + # Access the old transport for the last time before it's + # closed and forgotten forever: + transport, + req=req, + timeout=timeout, + ) + finally: + proxy_resp.close() + + return transport, proto + + +class UnixConnector(BaseConnector): + """Unix socket connector. + + path - Unix socket path. + keepalive_timeout - (optional) Keep-alive timeout. + force_close - Set to True to force close and do reconnect + after each request (and between redirects). + limit - The total number of simultaneous connections. + limit_per_host - Number of simultaneous connections to one host. + loop - Optional event loop. + """ + + allowed_protocol_schema_set = HIGH_LEVEL_SCHEMA_SET | frozenset({"unix"}) + + def __init__( + self, + path: str, + force_close: bool = False, + keepalive_timeout: Union[object, float, None] = sentinel, + limit: int = 100, + limit_per_host: int = 0, + loop: Optional[asyncio.AbstractEventLoop] = None, + ) -> None: + super().__init__( + force_close=force_close, + keepalive_timeout=keepalive_timeout, + limit=limit, + limit_per_host=limit_per_host, + loop=loop, + ) + self._path = path + + @property + def path(self) -> str: + """Path to unix socket.""" + return self._path + + async def _create_connection( + self, req: ClientRequest, traces: List["Trace"], timeout: "ClientTimeout" + ) -> ResponseHandler: + try: + async with ceil_timeout( + timeout.sock_connect, ceil_threshold=timeout.ceil_threshold + ): + _, proto = await self._loop.create_unix_connection( + self._factory, self._path + ) + except OSError as exc: + if exc.errno is None and isinstance(exc, asyncio.TimeoutError): + raise + raise UnixClientConnectorError(self.path, req.connection_key, exc) from exc + + return proto + + +class NamedPipeConnector(BaseConnector): + """Named pipe connector. + + Only supported by the proactor event loop. + See also: https://docs.python.org/3/library/asyncio-eventloop.html + + path - Windows named pipe path. + keepalive_timeout - (optional) Keep-alive timeout. + force_close - Set to True to force close and do reconnect + after each request (and between redirects). + limit - The total number of simultaneous connections. + limit_per_host - Number of simultaneous connections to one host. + loop - Optional event loop. + """ + + allowed_protocol_schema_set = HIGH_LEVEL_SCHEMA_SET | frozenset({"npipe"}) + + def __init__( + self, + path: str, + force_close: bool = False, + keepalive_timeout: Union[object, float, None] = sentinel, + limit: int = 100, + limit_per_host: int = 0, + loop: Optional[asyncio.AbstractEventLoop] = None, + ) -> None: + super().__init__( + force_close=force_close, + keepalive_timeout=keepalive_timeout, + limit=limit, + limit_per_host=limit_per_host, + loop=loop, + ) + if not isinstance( + self._loop, + asyncio.ProactorEventLoop, # type: ignore[attr-defined] + ): + raise RuntimeError( + "Named Pipes only available in proactor loop under windows" + ) + self._path = path + + @property + def path(self) -> str: + """Path to the named pipe.""" + return self._path + + async def _create_connection( + self, req: ClientRequest, traces: List["Trace"], timeout: "ClientTimeout" + ) -> ResponseHandler: + try: + async with ceil_timeout( + timeout.sock_connect, ceil_threshold=timeout.ceil_threshold + ): + _, proto = await self._loop.create_pipe_connection( # type: ignore[attr-defined] + self._factory, self._path + ) + # the drain is required so that the connection_made is called + # and transport is set otherwise it is not set before the + # `assert conn.transport is not None` + # in client.py's _request method + await asyncio.sleep(0) + # other option is to manually set transport like + # `proto.transport = trans` + except OSError as exc: + if exc.errno is None and isinstance(exc, asyncio.TimeoutError): + raise + raise ClientConnectorError(req.connection_key, exc) from exc + + return cast(ResponseHandler, proto) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py new file mode 100644 index 0000000000000000000000000000000000000000..193648d4309fc9252332c0c1241ab6aafbd2fe05 --- /dev/null +++ b/aiohttp/cookiejar.py @@ -0,0 +1,522 @@ +import asyncio +import calendar +import contextlib +import datetime +import heapq +import itertools +import os # noqa +import pathlib +import pickle +import re +import time +import warnings +from collections import defaultdict +from collections.abc import Mapping +from http.cookies import BaseCookie, Morsel, SimpleCookie +from typing import ( + DefaultDict, + Dict, + Iterable, + Iterator, + List, + Optional, + Set, + Tuple, + Union, +) + +from yarl import URL + +from ._cookie_helpers import preserve_morsel_with_coded_value +from .abc import AbstractCookieJar, ClearCookiePredicate +from .helpers import is_ip_address +from .typedefs import LooseCookies, PathLike, StrOrURL + +__all__ = ("CookieJar", "DummyCookieJar") + + +CookieItem = Union[str, "Morsel[str]"] + +# We cache these string methods here as their use is in performance critical code. +_FORMAT_PATH = "{}/{}".format +_FORMAT_DOMAIN_REVERSED = "{1}.{0}".format + +# The minimum number of scheduled cookie expirations before we start cleaning up +# the expiration heap. This is a performance optimization to avoid cleaning up the +# heap too often when there are only a few scheduled expirations. +_MIN_SCHEDULED_COOKIE_EXPIRATION = 100 +_SIMPLE_COOKIE = SimpleCookie() + + +class CookieJar(AbstractCookieJar): + """Implements cookie storage adhering to RFC 6265.""" + + DATE_TOKENS_RE = re.compile( + r"[\x09\x20-\x2F\x3B-\x40\x5B-\x60\x7B-\x7E]*" + r"(?P[\x00-\x08\x0A-\x1F\d:a-zA-Z\x7F-\xFF]+)" + ) + + DATE_HMS_TIME_RE = re.compile(r"(\d{1,2}):(\d{1,2}):(\d{1,2})") + + DATE_DAY_OF_MONTH_RE = re.compile(r"(\d{1,2})") + + DATE_MONTH_RE = re.compile( + "(jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|(nov)|(dec)", + re.I, + ) + + DATE_YEAR_RE = re.compile(r"(\d{2,4})") + + # calendar.timegm() fails for timestamps after datetime.datetime.max + # Minus one as a loss of precision occurs when timestamp() is called. + MAX_TIME = ( + int(datetime.datetime.max.replace(tzinfo=datetime.timezone.utc).timestamp()) - 1 + ) + try: + calendar.timegm(time.gmtime(MAX_TIME)) + except (OSError, ValueError): + # Hit the maximum representable time on Windows + # https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/localtime-localtime32-localtime64 + # Throws ValueError on PyPy 3.9, OSError elsewhere + MAX_TIME = calendar.timegm((3000, 12, 31, 23, 59, 59, -1, -1, -1)) + except OverflowError: + # #4515: datetime.max may not be representable on 32-bit platforms + MAX_TIME = 2**31 - 1 + # Avoid minuses in the future, 3x faster + SUB_MAX_TIME = MAX_TIME - 1 + + def __init__( + self, + *, + unsafe: bool = False, + quote_cookie: bool = True, + treat_as_secure_origin: Union[StrOrURL, List[StrOrURL], None] = None, + loop: Optional[asyncio.AbstractEventLoop] = None, + ) -> None: + super().__init__(loop=loop) + self._cookies: DefaultDict[Tuple[str, str], SimpleCookie] = defaultdict( + SimpleCookie + ) + self._morsel_cache: DefaultDict[Tuple[str, str], Dict[str, Morsel[str]]] = ( + defaultdict(dict) + ) + self._host_only_cookies: Set[Tuple[str, str]] = set() + self._unsafe = unsafe + self._quote_cookie = quote_cookie + if treat_as_secure_origin is None: + treat_as_secure_origin = [] + elif isinstance(treat_as_secure_origin, URL): + treat_as_secure_origin = [treat_as_secure_origin.origin()] + elif isinstance(treat_as_secure_origin, str): + treat_as_secure_origin = [URL(treat_as_secure_origin).origin()] + else: + treat_as_secure_origin = [ + URL(url).origin() if isinstance(url, str) else url.origin() + for url in treat_as_secure_origin + ] + self._treat_as_secure_origin = treat_as_secure_origin + self._expire_heap: List[Tuple[float, Tuple[str, str, str]]] = [] + self._expirations: Dict[Tuple[str, str, str], float] = {} + + @property + def quote_cookie(self) -> bool: + return self._quote_cookie + + def save(self, file_path: PathLike) -> None: + file_path = pathlib.Path(file_path) + with file_path.open(mode="wb") as f: + pickle.dump(self._cookies, f, pickle.HIGHEST_PROTOCOL) + + def load(self, file_path: PathLike) -> None: + file_path = pathlib.Path(file_path) + with file_path.open(mode="rb") as f: + self._cookies = pickle.load(f) + + def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: + if predicate is None: + self._expire_heap.clear() + self._cookies.clear() + self._morsel_cache.clear() + self._host_only_cookies.clear() + self._expirations.clear() + return + + now = time.time() + to_del = [ + key + for (domain, path), cookie in self._cookies.items() + for name, morsel in cookie.items() + if ( + (key := (domain, path, name)) in self._expirations + and self._expirations[key] <= now + ) + or predicate(morsel) + ] + if to_del: + self._delete_cookies(to_del) + + def clear_domain(self, domain: str) -> None: + self.clear(lambda x: self._is_domain_match(domain, x["domain"])) + + def __iter__(self) -> "Iterator[Morsel[str]]": + self._do_expiration() + for val in self._cookies.values(): + yield from val.values() + + def __len__(self) -> int: + """Return number of cookies. + + This function does not iterate self to avoid unnecessary expiration + checks. + """ + return sum(len(cookie.values()) for cookie in self._cookies.values()) + + def _do_expiration(self) -> None: + """Remove expired cookies.""" + if not (expire_heap_len := len(self._expire_heap)): + return + + # If the expiration heap grows larger than the number expirations + # times two, we clean it up to avoid keeping expired entries in + # the heap and consuming memory. We guard this with a minimum + # threshold to avoid cleaning up the heap too often when there are + # only a few scheduled expirations. + if ( + expire_heap_len > _MIN_SCHEDULED_COOKIE_EXPIRATION + and expire_heap_len > len(self._expirations) * 2 + ): + # Remove any expired entries from the expiration heap + # that do not match the expiration time in the expirations + # as it means the cookie has been re-added to the heap + # with a different expiration time. + self._expire_heap = [ + entry + for entry in self._expire_heap + if self._expirations.get(entry[1]) == entry[0] + ] + heapq.heapify(self._expire_heap) + + now = time.time() + to_del: List[Tuple[str, str, str]] = [] + # Find any expired cookies and add them to the to-delete list + while self._expire_heap: + when, cookie_key = self._expire_heap[0] + if when > now: + break + heapq.heappop(self._expire_heap) + # Check if the cookie hasn't been re-added to the heap + # with a different expiration time as it will be removed + # later when it reaches the top of the heap and its + # expiration time is met. + if self._expirations.get(cookie_key) == when: + to_del.append(cookie_key) + + if to_del: + self._delete_cookies(to_del) + + def _delete_cookies(self, to_del: List[Tuple[str, str, str]]) -> None: + for domain, path, name in to_del: + self._host_only_cookies.discard((domain, name)) + self._cookies[(domain, path)].pop(name, None) + self._morsel_cache[(domain, path)].pop(name, None) + self._expirations.pop((domain, path, name), None) + + def _expire_cookie(self, when: float, domain: str, path: str, name: str) -> None: + cookie_key = (domain, path, name) + if self._expirations.get(cookie_key) == when: + # Avoid adding duplicates to the heap + return + heapq.heappush(self._expire_heap, (when, cookie_key)) + self._expirations[cookie_key] = when + + def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> None: + """Update cookies.""" + hostname = response_url.raw_host + + if not self._unsafe and is_ip_address(hostname): + # Don't accept cookies from IPs + return + + if isinstance(cookies, Mapping): + cookies = cookies.items() + + for name, cookie in cookies: + if not isinstance(cookie, Morsel): + tmp = SimpleCookie() + tmp[name] = cookie # type: ignore[assignment] + cookie = tmp[name] + + domain = cookie["domain"] + + # ignore domains with trailing dots + if domain and domain[-1] == ".": + domain = "" + del cookie["domain"] + + if not domain and hostname is not None: + # Set the cookie's domain to the response hostname + # and set its host-only-flag + self._host_only_cookies.add((hostname, name)) + domain = cookie["domain"] = hostname + + if domain and domain[0] == ".": + # Remove leading dot + domain = domain[1:] + cookie["domain"] = domain + + if hostname and not self._is_domain_match(domain, hostname): + # Setting cookies for different domains is not allowed + continue + + path = cookie["path"] + if not path or path[0] != "/": + # Set the cookie's path to the response path + path = response_url.path + if not path.startswith("/"): + path = "/" + else: + # Cut everything from the last slash to the end + path = "/" + path[1 : path.rfind("/")] + cookie["path"] = path + path = path.rstrip("/") + + if max_age := cookie["max-age"]: + try: + delta_seconds = int(max_age) + max_age_expiration = min(time.time() + delta_seconds, self.MAX_TIME) + self._expire_cookie(max_age_expiration, domain, path, name) + except ValueError: + cookie["max-age"] = "" + + elif expires := cookie["expires"]: + if expire_time := self._parse_date(expires): + self._expire_cookie(expire_time, domain, path, name) + else: + cookie["expires"] = "" + + key = (domain, path) + if self._cookies[key].get(name) != cookie: + # Don't blow away the cache if the same + # cookie gets set again + self._cookies[key][name] = cookie + self._morsel_cache[key].pop(name, None) + + self._do_expiration() + + def filter_cookies(self, request_url: URL = URL()) -> "BaseCookie[str]": + """Returns this jar's cookies filtered by their attributes.""" + # We always use BaseCookie now since all + # cookies set on on filtered are fully constructed + # Morsels, not just names and values. + filtered: BaseCookie[str] = BaseCookie() + if not self._cookies: + # Skip do_expiration() if there are no cookies. + return filtered + self._do_expiration() + if not self._cookies: + # Skip rest of function if no non-expired cookies. + return filtered + if type(request_url) is not URL: + warnings.warn( + "filter_cookies expects yarl.URL instances only," + f"and will stop working in 4.x, got {type(request_url)}", + DeprecationWarning, + stacklevel=2, + ) + request_url = URL(request_url) + hostname = request_url.raw_host or "" + + is_not_secure = request_url.scheme not in ("https", "wss") + if is_not_secure and self._treat_as_secure_origin: + request_origin = URL() + with contextlib.suppress(ValueError): + request_origin = request_url.origin() + is_not_secure = request_origin not in self._treat_as_secure_origin + + # Send shared cookie + key = ("", "") + for c in self._cookies[key].values(): + # Check cache first + if c.key in self._morsel_cache[key]: + filtered[c.key] = self._morsel_cache[key][c.key] + continue + + # Build and cache the morsel + mrsl_val = self._build_morsel(c) + self._morsel_cache[key][c.key] = mrsl_val + filtered[c.key] = mrsl_val + + if is_ip_address(hostname): + if not self._unsafe: + return filtered + domains: Iterable[str] = (hostname,) + else: + # Get all the subdomains that might match a cookie (e.g. "foo.bar.com", "bar.com", "com") + domains = itertools.accumulate( + reversed(hostname.split(".")), _FORMAT_DOMAIN_REVERSED + ) + + # Get all the path prefixes that might match a cookie (e.g. "", "/foo", "/foo/bar") + paths = itertools.accumulate(request_url.path.split("/"), _FORMAT_PATH) + # Create every combination of (domain, path) pairs. + pairs = itertools.product(domains, paths) + + path_len = len(request_url.path) + # Point 2: https://www.rfc-editor.org/rfc/rfc6265.html#section-5.4 + for p in pairs: + if p not in self._cookies: + continue + for name, cookie in self._cookies[p].items(): + domain = cookie["domain"] + + if (domain, name) in self._host_only_cookies and domain != hostname: + continue + + # Skip edge case when the cookie has a trailing slash but request doesn't. + if len(cookie["path"]) > path_len: + continue + + if is_not_secure and cookie["secure"]: + continue + + # We already built the Morsel so reuse it here + if name in self._morsel_cache[p]: + filtered[name] = self._morsel_cache[p][name] + continue + + # Build and cache the morsel + mrsl_val = self._build_morsel(cookie) + self._morsel_cache[p][name] = mrsl_val + filtered[name] = mrsl_val + + return filtered + + def _build_morsel(self, cookie: Morsel[str]) -> Morsel[str]: + """Build a morsel for sending, respecting quote_cookie setting.""" + if self._quote_cookie and cookie.coded_value and cookie.coded_value[0] == '"': + return preserve_morsel_with_coded_value(cookie) + morsel: Morsel[str] = Morsel() + if self._quote_cookie: + value, coded_value = _SIMPLE_COOKIE.value_encode(cookie.value) + else: + coded_value = value = cookie.value + # We use __setstate__ instead of the public set() API because it allows us to + # bypass validation and set already validated state. This is more stable than + # setting protected attributes directly and unlikely to change since it would + # break pickling. + morsel.__setstate__({"key": cookie.key, "value": value, "coded_value": coded_value}) # type: ignore[attr-defined] + return morsel + + @staticmethod + def _is_domain_match(domain: str, hostname: str) -> bool: + """Implements domain matching adhering to RFC 6265.""" + if hostname == domain: + return True + + if not hostname.endswith(domain): + return False + + non_matching = hostname[: -len(domain)] + + if not non_matching.endswith("."): + return False + + return not is_ip_address(hostname) + + @classmethod + def _parse_date(cls, date_str: str) -> Optional[int]: + """Implements date string parsing adhering to RFC 6265.""" + if not date_str: + return None + + found_time = False + found_day = False + found_month = False + found_year = False + + hour = minute = second = 0 + day = 0 + month = 0 + year = 0 + + for token_match in cls.DATE_TOKENS_RE.finditer(date_str): + + token = token_match.group("token") + + if not found_time: + time_match = cls.DATE_HMS_TIME_RE.match(token) + if time_match: + found_time = True + hour, minute, second = (int(s) for s in time_match.groups()) + continue + + if not found_day: + day_match = cls.DATE_DAY_OF_MONTH_RE.match(token) + if day_match: + found_day = True + day = int(day_match.group()) + continue + + if not found_month: + month_match = cls.DATE_MONTH_RE.match(token) + if month_match: + found_month = True + assert month_match.lastindex is not None + month = month_match.lastindex + continue + + if not found_year: + year_match = cls.DATE_YEAR_RE.match(token) + if year_match: + found_year = True + year = int(year_match.group()) + + if 70 <= year <= 99: + year += 1900 + elif 0 <= year <= 69: + year += 2000 + + if False in (found_day, found_month, found_year, found_time): + return None + + if not 1 <= day <= 31: + return None + + if year < 1601 or hour > 23 or minute > 59 or second > 59: + return None + + return calendar.timegm((year, month, day, hour, minute, second, -1, -1, -1)) + + +class DummyCookieJar(AbstractCookieJar): + """Implements a dummy cookie storage. + + It can be used with the ClientSession when no cookie processing is needed. + + """ + + def __init__(self, *, loop: Optional[asyncio.AbstractEventLoop] = None) -> None: + super().__init__(loop=loop) + + def __iter__(self) -> "Iterator[Morsel[str]]": + while False: + yield None + + def __len__(self) -> int: + return 0 + + @property + def quote_cookie(self) -> bool: + return True + + def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: + pass + + def clear_domain(self, domain: str) -> None: + pass + + def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> None: + pass + + def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": + return SimpleCookie() diff --git a/aiohttp/formdata.py b/aiohttp/formdata.py new file mode 100644 index 0000000000000000000000000000000000000000..a5a4f603e190cdc66fe34e95a47983377f94666f --- /dev/null +++ b/aiohttp/formdata.py @@ -0,0 +1,179 @@ +import io +import warnings +from typing import Any, Iterable, List, Optional +from urllib.parse import urlencode + +from multidict import MultiDict, MultiDictProxy + +from . import hdrs, multipart, payload +from .helpers import guess_filename +from .payload import Payload + +__all__ = ("FormData",) + + +class FormData: + """Helper class for form body generation. + + Supports multipart/form-data and application/x-www-form-urlencoded. + """ + + def __init__( + self, + fields: Iterable[Any] = (), + quote_fields: bool = True, + charset: Optional[str] = None, + *, + default_to_multipart: bool = False, + ) -> None: + self._writer = multipart.MultipartWriter("form-data") + self._fields: List[Any] = [] + self._is_multipart = default_to_multipart + self._quote_fields = quote_fields + self._charset = charset + + if isinstance(fields, dict): + fields = list(fields.items()) + elif not isinstance(fields, (list, tuple)): + fields = (fields,) + self.add_fields(*fields) + + @property + def is_multipart(self) -> bool: + return self._is_multipart + + def add_field( + self, + name: str, + value: Any, + *, + content_type: Optional[str] = None, + filename: Optional[str] = None, + content_transfer_encoding: Optional[str] = None, + ) -> None: + + if isinstance(value, io.IOBase): + self._is_multipart = True + elif isinstance(value, (bytes, bytearray, memoryview)): + msg = ( + "In v4, passing bytes will no longer create a file field. " + "Please explicitly use the filename parameter or pass a BytesIO object." + ) + if filename is None and content_transfer_encoding is None: + warnings.warn(msg, DeprecationWarning) + filename = name + + type_options: MultiDict[str] = MultiDict({"name": name}) + if filename is not None and not isinstance(filename, str): + raise TypeError("filename must be an instance of str. Got: %s" % filename) + if filename is None and isinstance(value, io.IOBase): + filename = guess_filename(value, name) + if filename is not None: + type_options["filename"] = filename + self._is_multipart = True + + headers = {} + if content_type is not None: + if not isinstance(content_type, str): + raise TypeError( + "content_type must be an instance of str. Got: %s" % content_type + ) + headers[hdrs.CONTENT_TYPE] = content_type + self._is_multipart = True + if content_transfer_encoding is not None: + if not isinstance(content_transfer_encoding, str): + raise TypeError( + "content_transfer_encoding must be an instance" + " of str. Got: %s" % content_transfer_encoding + ) + msg = ( + "content_transfer_encoding is deprecated. " + "To maintain compatibility with v4 please pass a BytesPayload." + ) + warnings.warn(msg, DeprecationWarning) + self._is_multipart = True + + self._fields.append((type_options, headers, value)) + + def add_fields(self, *fields: Any) -> None: + to_add = list(fields) + + while to_add: + rec = to_add.pop(0) + + if isinstance(rec, io.IOBase): + k = guess_filename(rec, "unknown") + self.add_field(k, rec) # type: ignore[arg-type] + + elif isinstance(rec, (MultiDictProxy, MultiDict)): + to_add.extend(rec.items()) + + elif isinstance(rec, (list, tuple)) and len(rec) == 2: + k, fp = rec + self.add_field(k, fp) + + else: + raise TypeError( + "Only io.IOBase, multidict and (name, file) " + "pairs allowed, use .add_field() for passing " + "more complex parameters, got {!r}".format(rec) + ) + + def _gen_form_urlencoded(self) -> payload.BytesPayload: + # form data (x-www-form-urlencoded) + data = [] + for type_options, _, value in self._fields: + data.append((type_options["name"], value)) + + charset = self._charset if self._charset is not None else "utf-8" + + if charset == "utf-8": + content_type = "application/x-www-form-urlencoded" + else: + content_type = "application/x-www-form-urlencoded; charset=%s" % charset + + return payload.BytesPayload( + urlencode(data, doseq=True, encoding=charset).encode(), + content_type=content_type, + ) + + def _gen_form_data(self) -> multipart.MultipartWriter: + """Encode a list of fields using the multipart/form-data MIME format""" + for dispparams, headers, value in self._fields: + try: + if hdrs.CONTENT_TYPE in headers: + part = payload.get_payload( + value, + content_type=headers[hdrs.CONTENT_TYPE], + headers=headers, + encoding=self._charset, + ) + else: + part = payload.get_payload( + value, headers=headers, encoding=self._charset + ) + except Exception as exc: + raise TypeError( + "Can not serialize value type: %r\n " + "headers: %r\n value: %r" % (type(value), headers, value) + ) from exc + + if dispparams: + part.set_content_disposition( + "form-data", quote_fields=self._quote_fields, **dispparams + ) + # FIXME cgi.FieldStorage doesn't likes body parts with + # Content-Length which were sent via chunked transfer encoding + assert part.headers is not None + part.headers.popall(hdrs.CONTENT_LENGTH, None) + + self._writer.append_payload(part) + + self._fields.clear() + return self._writer + + def __call__(self) -> Payload: + if self._is_multipart: + return self._gen_form_data() + else: + return self._gen_form_urlencoded() diff --git a/aiohttp/hdrs.py b/aiohttp/hdrs.py new file mode 100644 index 0000000000000000000000000000000000000000..c8d6b35f33ae4be537d7776ce4085982618d1305 --- /dev/null +++ b/aiohttp/hdrs.py @@ -0,0 +1,121 @@ +"""HTTP Headers constants.""" + +# After changing the file content call ./tools/gen.py +# to regenerate the headers parser +import itertools +from typing import Final, Set + +from multidict import istr + +METH_ANY: Final[str] = "*" +METH_CONNECT: Final[str] = "CONNECT" +METH_HEAD: Final[str] = "HEAD" +METH_GET: Final[str] = "GET" +METH_DELETE: Final[str] = "DELETE" +METH_OPTIONS: Final[str] = "OPTIONS" +METH_PATCH: Final[str] = "PATCH" +METH_POST: Final[str] = "POST" +METH_PUT: Final[str] = "PUT" +METH_TRACE: Final[str] = "TRACE" + +METH_ALL: Final[Set[str]] = { + METH_CONNECT, + METH_HEAD, + METH_GET, + METH_DELETE, + METH_OPTIONS, + METH_PATCH, + METH_POST, + METH_PUT, + METH_TRACE, +} + +ACCEPT: Final[istr] = istr("Accept") +ACCEPT_CHARSET: Final[istr] = istr("Accept-Charset") +ACCEPT_ENCODING: Final[istr] = istr("Accept-Encoding") +ACCEPT_LANGUAGE: Final[istr] = istr("Accept-Language") +ACCEPT_RANGES: Final[istr] = istr("Accept-Ranges") +ACCESS_CONTROL_MAX_AGE: Final[istr] = istr("Access-Control-Max-Age") +ACCESS_CONTROL_ALLOW_CREDENTIALS: Final[istr] = istr("Access-Control-Allow-Credentials") +ACCESS_CONTROL_ALLOW_HEADERS: Final[istr] = istr("Access-Control-Allow-Headers") +ACCESS_CONTROL_ALLOW_METHODS: Final[istr] = istr("Access-Control-Allow-Methods") +ACCESS_CONTROL_ALLOW_ORIGIN: Final[istr] = istr("Access-Control-Allow-Origin") +ACCESS_CONTROL_EXPOSE_HEADERS: Final[istr] = istr("Access-Control-Expose-Headers") +ACCESS_CONTROL_REQUEST_HEADERS: Final[istr] = istr("Access-Control-Request-Headers") +ACCESS_CONTROL_REQUEST_METHOD: Final[istr] = istr("Access-Control-Request-Method") +AGE: Final[istr] = istr("Age") +ALLOW: Final[istr] = istr("Allow") +AUTHORIZATION: Final[istr] = istr("Authorization") +CACHE_CONTROL: Final[istr] = istr("Cache-Control") +CONNECTION: Final[istr] = istr("Connection") +CONTENT_DISPOSITION: Final[istr] = istr("Content-Disposition") +CONTENT_ENCODING: Final[istr] = istr("Content-Encoding") +CONTENT_LANGUAGE: Final[istr] = istr("Content-Language") +CONTENT_LENGTH: Final[istr] = istr("Content-Length") +CONTENT_LOCATION: Final[istr] = istr("Content-Location") +CONTENT_MD5: Final[istr] = istr("Content-MD5") +CONTENT_RANGE: Final[istr] = istr("Content-Range") +CONTENT_TRANSFER_ENCODING: Final[istr] = istr("Content-Transfer-Encoding") +CONTENT_TYPE: Final[istr] = istr("Content-Type") +COOKIE: Final[istr] = istr("Cookie") +DATE: Final[istr] = istr("Date") +DESTINATION: Final[istr] = istr("Destination") +DIGEST: Final[istr] = istr("Digest") +ETAG: Final[istr] = istr("Etag") +EXPECT: Final[istr] = istr("Expect") +EXPIRES: Final[istr] = istr("Expires") +FORWARDED: Final[istr] = istr("Forwarded") +FROM: Final[istr] = istr("From") +HOST: Final[istr] = istr("Host") +IF_MATCH: Final[istr] = istr("If-Match") +IF_MODIFIED_SINCE: Final[istr] = istr("If-Modified-Since") +IF_NONE_MATCH: Final[istr] = istr("If-None-Match") +IF_RANGE: Final[istr] = istr("If-Range") +IF_UNMODIFIED_SINCE: Final[istr] = istr("If-Unmodified-Since") +KEEP_ALIVE: Final[istr] = istr("Keep-Alive") +LAST_EVENT_ID: Final[istr] = istr("Last-Event-ID") +LAST_MODIFIED: Final[istr] = istr("Last-Modified") +LINK: Final[istr] = istr("Link") +LOCATION: Final[istr] = istr("Location") +MAX_FORWARDS: Final[istr] = istr("Max-Forwards") +ORIGIN: Final[istr] = istr("Origin") +PRAGMA: Final[istr] = istr("Pragma") +PROXY_AUTHENTICATE: Final[istr] = istr("Proxy-Authenticate") +PROXY_AUTHORIZATION: Final[istr] = istr("Proxy-Authorization") +RANGE: Final[istr] = istr("Range") +REFERER: Final[istr] = istr("Referer") +RETRY_AFTER: Final[istr] = istr("Retry-After") +SEC_WEBSOCKET_ACCEPT: Final[istr] = istr("Sec-WebSocket-Accept") +SEC_WEBSOCKET_VERSION: Final[istr] = istr("Sec-WebSocket-Version") +SEC_WEBSOCKET_PROTOCOL: Final[istr] = istr("Sec-WebSocket-Protocol") +SEC_WEBSOCKET_EXTENSIONS: Final[istr] = istr("Sec-WebSocket-Extensions") +SEC_WEBSOCKET_KEY: Final[istr] = istr("Sec-WebSocket-Key") +SEC_WEBSOCKET_KEY1: Final[istr] = istr("Sec-WebSocket-Key1") +SERVER: Final[istr] = istr("Server") +SET_COOKIE: Final[istr] = istr("Set-Cookie") +TE: Final[istr] = istr("TE") +TRAILER: Final[istr] = istr("Trailer") +TRANSFER_ENCODING: Final[istr] = istr("Transfer-Encoding") +UPGRADE: Final[istr] = istr("Upgrade") +URI: Final[istr] = istr("URI") +USER_AGENT: Final[istr] = istr("User-Agent") +VARY: Final[istr] = istr("Vary") +VIA: Final[istr] = istr("Via") +WANT_DIGEST: Final[istr] = istr("Want-Digest") +WARNING: Final[istr] = istr("Warning") +WWW_AUTHENTICATE: Final[istr] = istr("WWW-Authenticate") +X_FORWARDED_FOR: Final[istr] = istr("X-Forwarded-For") +X_FORWARDED_HOST: Final[istr] = istr("X-Forwarded-Host") +X_FORWARDED_PROTO: Final[istr] = istr("X-Forwarded-Proto") + +# These are the upper/lower case variants of the headers/methods +# Example: {'hOst', 'host', 'HoST', 'HOSt', 'hOsT', 'HosT', 'hoSt', ...} +METH_HEAD_ALL: Final = frozenset( + map("".join, itertools.product(*zip(METH_HEAD.upper(), METH_HEAD.lower()))) +) +METH_CONNECT_ALL: Final = frozenset( + map("".join, itertools.product(*zip(METH_CONNECT.upper(), METH_CONNECT.lower()))) +) +HOST_ALL: Final = frozenset( + map("".join, itertools.product(*zip(HOST.upper(), HOST.lower()))) +) diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..dfab9877d398a9c8807ad2614397b3e54316ed1f --- /dev/null +++ b/aiohttp/helpers.py @@ -0,0 +1,986 @@ +"""Various helper functions""" + +import asyncio +import base64 +import binascii +import contextlib +import datetime +import enum +import functools +import inspect +import netrc +import os +import platform +import re +import sys +import time +import weakref +from collections import namedtuple +from contextlib import suppress +from email.message import EmailMessage +from email.parser import HeaderParser +from email.policy import HTTP +from email.utils import parsedate +from math import ceil +from pathlib import Path +from types import MappingProxyType, TracebackType +from typing import ( + Any, + Callable, + ContextManager, + Dict, + Generator, + Generic, + Iterable, + Iterator, + List, + Mapping, + Optional, + Protocol, + Tuple, + Type, + TypeVar, + Union, + get_args, + overload, +) +from urllib.parse import quote +from urllib.request import getproxies, proxy_bypass + +import attr +from multidict import MultiDict, MultiDictProxy, MultiMapping +from propcache.api import under_cached_property as reify +from yarl import URL + +from . import hdrs +from .log import client_logger + +if sys.version_info >= (3, 11): + import asyncio as async_timeout +else: + import async_timeout + +__all__ = ("BasicAuth", "ChainMapProxy", "ETag", "reify") + +IS_MACOS = platform.system() == "Darwin" +IS_WINDOWS = platform.system() == "Windows" + +PY_310 = sys.version_info >= (3, 10) +PY_311 = sys.version_info >= (3, 11) + + +_T = TypeVar("_T") +_S = TypeVar("_S") + +_SENTINEL = enum.Enum("_SENTINEL", "sentinel") +sentinel = _SENTINEL.sentinel + +NO_EXTENSIONS = bool(os.environ.get("AIOHTTP_NO_EXTENSIONS")) + +# https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 +EMPTY_BODY_STATUS_CODES = frozenset((204, 304, *range(100, 200))) +# https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.1 +# https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.2 +EMPTY_BODY_METHODS = hdrs.METH_HEAD_ALL + +DEBUG = sys.flags.dev_mode or ( + not sys.flags.ignore_environment and bool(os.environ.get("PYTHONASYNCIODEBUG")) +) + + +CHAR = {chr(i) for i in range(0, 128)} +CTL = {chr(i) for i in range(0, 32)} | { + chr(127), +} +SEPARATORS = { + "(", + ")", + "<", + ">", + "@", + ",", + ";", + ":", + "\\", + '"', + "/", + "[", + "]", + "?", + "=", + "{", + "}", + " ", + chr(9), +} +TOKEN = CHAR ^ CTL ^ SEPARATORS + + +class noop: + def __await__(self) -> Generator[None, None, None]: + yield + + +class BasicAuth(namedtuple("BasicAuth", ["login", "password", "encoding"])): + """Http basic authentication helper.""" + + def __new__( + cls, login: str, password: str = "", encoding: str = "latin1" + ) -> "BasicAuth": + if login is None: + raise ValueError("None is not allowed as login value") + + if password is None: + raise ValueError("None is not allowed as password value") + + if ":" in login: + raise ValueError('A ":" is not allowed in login (RFC 1945#section-11.1)') + + return super().__new__(cls, login, password, encoding) + + @classmethod + def decode(cls, auth_header: str, encoding: str = "latin1") -> "BasicAuth": + """Create a BasicAuth object from an Authorization HTTP header.""" + try: + auth_type, encoded_credentials = auth_header.split(" ", 1) + except ValueError: + raise ValueError("Could not parse authorization header.") + + if auth_type.lower() != "basic": + raise ValueError("Unknown authorization method %s" % auth_type) + + try: + decoded = base64.b64decode( + encoded_credentials.encode("ascii"), validate=True + ).decode(encoding) + except binascii.Error: + raise ValueError("Invalid base64 encoding.") + + try: + # RFC 2617 HTTP Authentication + # https://www.ietf.org/rfc/rfc2617.txt + # the colon must be present, but the username and password may be + # otherwise blank. + username, password = decoded.split(":", 1) + except ValueError: + raise ValueError("Invalid credentials.") + + return cls(username, password, encoding=encoding) + + @classmethod + def from_url(cls, url: URL, *, encoding: str = "latin1") -> Optional["BasicAuth"]: + """Create BasicAuth from url.""" + if not isinstance(url, URL): + raise TypeError("url should be yarl.URL instance") + # Check raw_user and raw_password first as yarl is likely + # to already have these values parsed from the netloc in the cache. + if url.raw_user is None and url.raw_password is None: + return None + return cls(url.user or "", url.password or "", encoding=encoding) + + def encode(self) -> str: + """Encode credentials.""" + creds = (f"{self.login}:{self.password}").encode(self.encoding) + return "Basic %s" % base64.b64encode(creds).decode(self.encoding) + + +def strip_auth_from_url(url: URL) -> Tuple[URL, Optional[BasicAuth]]: + """Remove user and password from URL if present and return BasicAuth object.""" + # Check raw_user and raw_password first as yarl is likely + # to already have these values parsed from the netloc in the cache. + if url.raw_user is None and url.raw_password is None: + return url, None + return url.with_user(None), BasicAuth(url.user or "", url.password or "") + + +def netrc_from_env() -> Optional[netrc.netrc]: + """Load netrc from file. + + Attempt to load it from the path specified by the env-var + NETRC or in the default location in the user's home directory. + + Returns None if it couldn't be found or fails to parse. + """ + netrc_env = os.environ.get("NETRC") + + if netrc_env is not None: + netrc_path = Path(netrc_env) + else: + try: + home_dir = Path.home() + except RuntimeError as e: # pragma: no cover + # if pathlib can't resolve home, it may raise a RuntimeError + client_logger.debug( + "Could not resolve home directory when " + "trying to look for .netrc file: %s", + e, + ) + return None + + netrc_path = home_dir / ("_netrc" if IS_WINDOWS else ".netrc") + + try: + return netrc.netrc(str(netrc_path)) + except netrc.NetrcParseError as e: + client_logger.warning("Could not parse .netrc file: %s", e) + except OSError as e: + netrc_exists = False + with contextlib.suppress(OSError): + netrc_exists = netrc_path.is_file() + # we couldn't read the file (doesn't exist, permissions, etc.) + if netrc_env or netrc_exists: + # only warn if the environment wanted us to load it, + # or it appears like the default file does actually exist + client_logger.warning("Could not read .netrc file: %s", e) + + return None + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class ProxyInfo: + proxy: URL + proxy_auth: Optional[BasicAuth] + + +def basicauth_from_netrc(netrc_obj: Optional[netrc.netrc], host: str) -> BasicAuth: + """ + Return :py:class:`~aiohttp.BasicAuth` credentials for ``host`` from ``netrc_obj``. + + :raises LookupError: if ``netrc_obj`` is :py:data:`None` or if no + entry is found for the ``host``. + """ + if netrc_obj is None: + raise LookupError("No .netrc file found") + auth_from_netrc = netrc_obj.authenticators(host) + + if auth_from_netrc is None: + raise LookupError(f"No entry for {host!s} found in the `.netrc` file.") + login, account, password = auth_from_netrc + + # TODO(PY311): username = login or account + # Up to python 3.10, account could be None if not specified, + # and login will be empty string if not specified. From 3.11, + # login and account will be empty string if not specified. + username = login if (login or account is None) else account + + # TODO(PY311): Remove this, as password will be empty string + # if not specified + if password is None: + password = "" + + return BasicAuth(username, password) + + +def proxies_from_env() -> Dict[str, ProxyInfo]: + proxy_urls = { + k: URL(v) + for k, v in getproxies().items() + if k in ("http", "https", "ws", "wss") + } + netrc_obj = netrc_from_env() + stripped = {k: strip_auth_from_url(v) for k, v in proxy_urls.items()} + ret = {} + for proto, val in stripped.items(): + proxy, auth = val + if proxy.scheme in ("https", "wss"): + client_logger.warning( + "%s proxies %s are not supported, ignoring", proxy.scheme.upper(), proxy + ) + continue + if netrc_obj and auth is None: + if proxy.host is not None: + try: + auth = basicauth_from_netrc(netrc_obj, proxy.host) + except LookupError: + auth = None + ret[proto] = ProxyInfo(proxy, auth) + return ret + + +def get_env_proxy_for_url(url: URL) -> Tuple[URL, Optional[BasicAuth]]: + """Get a permitted proxy for the given URL from the env.""" + if url.host is not None and proxy_bypass(url.host): + raise LookupError(f"Proxying is disallowed for `{url.host!r}`") + + proxies_in_env = proxies_from_env() + try: + proxy_info = proxies_in_env[url.scheme] + except KeyError: + raise LookupError(f"No proxies found for `{url!s}` in the env") + else: + return proxy_info.proxy, proxy_info.proxy_auth + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class MimeType: + type: str + subtype: str + suffix: str + parameters: "MultiDictProxy[str]" + + +@functools.lru_cache(maxsize=56) +def parse_mimetype(mimetype: str) -> MimeType: + """Parses a MIME type into its components. + + mimetype is a MIME type string. + + Returns a MimeType object. + + Example: + + >>> parse_mimetype('text/html; charset=utf-8') + MimeType(type='text', subtype='html', suffix='', + parameters={'charset': 'utf-8'}) + + """ + if not mimetype: + return MimeType( + type="", subtype="", suffix="", parameters=MultiDictProxy(MultiDict()) + ) + + parts = mimetype.split(";") + params: MultiDict[str] = MultiDict() + for item in parts[1:]: + if not item: + continue + key, _, value = item.partition("=") + params.add(key.lower().strip(), value.strip(' "')) + + fulltype = parts[0].strip().lower() + if fulltype == "*": + fulltype = "*/*" + + mtype, _, stype = fulltype.partition("/") + stype, _, suffix = stype.partition("+") + + return MimeType( + type=mtype, subtype=stype, suffix=suffix, parameters=MultiDictProxy(params) + ) + + +class EnsureOctetStream(EmailMessage): + def __init__(self) -> None: + super().__init__() + # https://www.rfc-editor.org/rfc/rfc9110#section-8.3-5 + self.set_default_type("application/octet-stream") + + def get_content_type(self) -> str: + """Re-implementation from Message + + Returns application/octet-stream in place of plain/text when + value is wrong. + + The way this class is used guarantees that content-type will + be present so simplify the checks wrt to the base implementation. + """ + value = self.get("content-type", "").lower() + + # Based on the implementation of _splitparam in the standard library + ctype, _, _ = value.partition(";") + ctype = ctype.strip() + if ctype.count("/") != 1: + return self.get_default_type() + return ctype + + +@functools.lru_cache(maxsize=56) +def parse_content_type(raw: str) -> Tuple[str, MappingProxyType[str, str]]: + """Parse Content-Type header. + + Returns a tuple of the parsed content type and a + MappingProxyType of parameters. The default returned value + is `application/octet-stream` + """ + msg = HeaderParser(EnsureOctetStream, policy=HTTP).parsestr(f"Content-Type: {raw}") + content_type = msg.get_content_type() + params = msg.get_params(()) + content_dict = dict(params[1:]) # First element is content type again + return content_type, MappingProxyType(content_dict) + + +def guess_filename(obj: Any, default: Optional[str] = None) -> Optional[str]: + name = getattr(obj, "name", None) + if name and isinstance(name, str) and name[0] != "<" and name[-1] != ">": + return Path(name).name + return default + + +not_qtext_re = re.compile(r"[^\041\043-\133\135-\176]") +QCONTENT = {chr(i) for i in range(0x20, 0x7F)} | {"\t"} + + +def quoted_string(content: str) -> str: + """Return 7-bit content as quoted-string. + + Format content into a quoted-string as defined in RFC5322 for + Internet Message Format. Notice that this is not the 8-bit HTTP + format, but the 7-bit email format. Content must be in usascii or + a ValueError is raised. + """ + if not (QCONTENT > set(content)): + raise ValueError(f"bad content for quoted-string {content!r}") + return not_qtext_re.sub(lambda x: "\\" + x.group(0), content) + + +def content_disposition_header( + disptype: str, quote_fields: bool = True, _charset: str = "utf-8", **params: str +) -> str: + """Sets ``Content-Disposition`` header for MIME. + + This is the MIME payload Content-Disposition header from RFC 2183 + and RFC 7579 section 4.2, not the HTTP Content-Disposition from + RFC 6266. + + disptype is a disposition type: inline, attachment, form-data. + Should be valid extension token (see RFC 2183) + + quote_fields performs value quoting to 7-bit MIME headers + according to RFC 7578. Set to quote_fields to False if recipient + can take 8-bit file names and field values. + + _charset specifies the charset to use when quote_fields is True. + + params is a dict with disposition params. + """ + if not disptype or not (TOKEN > set(disptype)): + raise ValueError(f"bad content disposition type {disptype!r}") + + value = disptype + if params: + lparams = [] + for key, val in params.items(): + if not key or not (TOKEN > set(key)): + raise ValueError(f"bad content disposition parameter {key!r}={val!r}") + if quote_fields: + if key.lower() == "filename": + qval = quote(val, "", encoding=_charset) + lparams.append((key, '"%s"' % qval)) + else: + try: + qval = quoted_string(val) + except ValueError: + qval = "".join( + (_charset, "''", quote(val, "", encoding=_charset)) + ) + lparams.append((key + "*", qval)) + else: + lparams.append((key, '"%s"' % qval)) + else: + qval = val.replace("\\", "\\\\").replace('"', '\\"') + lparams.append((key, '"%s"' % qval)) + sparams = "; ".join("=".join(pair) for pair in lparams) + value = "; ".join((value, sparams)) + return value + + +def is_ip_address(host: Optional[str]) -> bool: + """Check if host looks like an IP Address. + + This check is only meant as a heuristic to ensure that + a host is not a domain name. + """ + if not host: + return False + # For a host to be an ipv4 address, it must be all numeric. + # The host must contain a colon to be an IPv6 address. + return ":" in host or host.replace(".", "").isdigit() + + +_cached_current_datetime: Optional[int] = None +_cached_formatted_datetime = "" + + +def rfc822_formatted_time() -> str: + global _cached_current_datetime + global _cached_formatted_datetime + + now = int(time.time()) + if now != _cached_current_datetime: + # Weekday and month names for HTTP date/time formatting; + # always English! + # Tuples are constants stored in codeobject! + _weekdayname = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun") + _monthname = ( + "", # Dummy so we can use 1-based month numbers + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ) + + year, month, day, hh, mm, ss, wd, *tail = time.gmtime(now) + _cached_formatted_datetime = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + _weekdayname[wd], + day, + _monthname[month], + year, + hh, + mm, + ss, + ) + _cached_current_datetime = now + return _cached_formatted_datetime + + +def _weakref_handle(info: "Tuple[weakref.ref[object], str]") -> None: + ref, name = info + ob = ref() + if ob is not None: + with suppress(Exception): + getattr(ob, name)() + + +def weakref_handle( + ob: object, + name: str, + timeout: float, + loop: asyncio.AbstractEventLoop, + timeout_ceil_threshold: float = 5, +) -> Optional[asyncio.TimerHandle]: + if timeout is not None and timeout > 0: + when = loop.time() + timeout + if timeout >= timeout_ceil_threshold: + when = ceil(when) + + return loop.call_at(when, _weakref_handle, (weakref.ref(ob), name)) + return None + + +def call_later( + cb: Callable[[], Any], + timeout: float, + loop: asyncio.AbstractEventLoop, + timeout_ceil_threshold: float = 5, +) -> Optional[asyncio.TimerHandle]: + if timeout is None or timeout <= 0: + return None + now = loop.time() + when = calculate_timeout_when(now, timeout, timeout_ceil_threshold) + return loop.call_at(when, cb) + + +def calculate_timeout_when( + loop_time: float, + timeout: float, + timeout_ceiling_threshold: float, +) -> float: + """Calculate when to execute a timeout.""" + when = loop_time + timeout + if timeout > timeout_ceiling_threshold: + return ceil(when) + return when + + +class TimeoutHandle: + """Timeout handle""" + + __slots__ = ("_timeout", "_loop", "_ceil_threshold", "_callbacks") + + def __init__( + self, + loop: asyncio.AbstractEventLoop, + timeout: Optional[float], + ceil_threshold: float = 5, + ) -> None: + self._timeout = timeout + self._loop = loop + self._ceil_threshold = ceil_threshold + self._callbacks: List[ + Tuple[Callable[..., None], Tuple[Any, ...], Dict[str, Any]] + ] = [] + + def register( + self, callback: Callable[..., None], *args: Any, **kwargs: Any + ) -> None: + self._callbacks.append((callback, args, kwargs)) + + def close(self) -> None: + self._callbacks.clear() + + def start(self) -> Optional[asyncio.TimerHandle]: + timeout = self._timeout + if timeout is not None and timeout > 0: + when = self._loop.time() + timeout + if timeout >= self._ceil_threshold: + when = ceil(when) + return self._loop.call_at(when, self.__call__) + else: + return None + + def timer(self) -> "BaseTimerContext": + if self._timeout is not None and self._timeout > 0: + timer = TimerContext(self._loop) + self.register(timer.timeout) + return timer + else: + return TimerNoop() + + def __call__(self) -> None: + for cb, args, kwargs in self._callbacks: + with suppress(Exception): + cb(*args, **kwargs) + + self._callbacks.clear() + + +class BaseTimerContext(ContextManager["BaseTimerContext"]): + + __slots__ = () + + def assert_timeout(self) -> None: + """Raise TimeoutError if timeout has been exceeded.""" + + +class TimerNoop(BaseTimerContext): + + __slots__ = () + + def __enter__(self) -> BaseTimerContext: + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + return + + +class TimerContext(BaseTimerContext): + """Low resolution timeout context manager""" + + __slots__ = ("_loop", "_tasks", "_cancelled", "_cancelling") + + def __init__(self, loop: asyncio.AbstractEventLoop) -> None: + self._loop = loop + self._tasks: List[asyncio.Task[Any]] = [] + self._cancelled = False + self._cancelling = 0 + + def assert_timeout(self) -> None: + """Raise TimeoutError if timer has already been cancelled.""" + if self._cancelled: + raise asyncio.TimeoutError from None + + def __enter__(self) -> BaseTimerContext: + task = asyncio.current_task(loop=self._loop) + if task is None: + raise RuntimeError("Timeout context manager should be used inside a task") + + if sys.version_info >= (3, 11): + # Remember if the task was already cancelling + # so when we __exit__ we can decide if we should + # raise asyncio.TimeoutError or let the cancellation propagate + self._cancelling = task.cancelling() + + if self._cancelled: + raise asyncio.TimeoutError from None + + self._tasks.append(task) + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Optional[bool]: + enter_task: Optional[asyncio.Task[Any]] = None + if self._tasks: + enter_task = self._tasks.pop() + + if exc_type is asyncio.CancelledError and self._cancelled: + assert enter_task is not None + # The timeout was hit, and the task was cancelled + # so we need to uncancel the last task that entered the context manager + # since the cancellation should not leak out of the context manager + if sys.version_info >= (3, 11): + # If the task was already cancelling don't raise + # asyncio.TimeoutError and instead return None + # to allow the cancellation to propagate + if enter_task.uncancel() > self._cancelling: + return None + raise asyncio.TimeoutError from exc_val + return None + + def timeout(self) -> None: + if not self._cancelled: + for task in set(self._tasks): + task.cancel() + + self._cancelled = True + + +def ceil_timeout( + delay: Optional[float], ceil_threshold: float = 5 +) -> async_timeout.Timeout: + if delay is None or delay <= 0: + return async_timeout.timeout(None) + + loop = asyncio.get_running_loop() + now = loop.time() + when = now + delay + if delay > ceil_threshold: + when = ceil(when) + return async_timeout.timeout_at(when) + + +class HeadersMixin: + """Mixin for handling headers.""" + + ATTRS = frozenset(["_content_type", "_content_dict", "_stored_content_type"]) + + _headers: MultiMapping[str] + _content_type: Optional[str] = None + _content_dict: Optional[Dict[str, str]] = None + _stored_content_type: Union[str, None, _SENTINEL] = sentinel + + def _parse_content_type(self, raw: Optional[str]) -> None: + self._stored_content_type = raw + if raw is None: + # default value according to RFC 2616 + self._content_type = "application/octet-stream" + self._content_dict = {} + else: + content_type, content_mapping_proxy = parse_content_type(raw) + self._content_type = content_type + # _content_dict needs to be mutable so we can update it + self._content_dict = content_mapping_proxy.copy() + + @property + def content_type(self) -> str: + """The value of content part for Content-Type HTTP header.""" + raw = self._headers.get(hdrs.CONTENT_TYPE) + if self._stored_content_type != raw: + self._parse_content_type(raw) + assert self._content_type is not None + return self._content_type + + @property + def charset(self) -> Optional[str]: + """The value of charset part for Content-Type HTTP header.""" + raw = self._headers.get(hdrs.CONTENT_TYPE) + if self._stored_content_type != raw: + self._parse_content_type(raw) + assert self._content_dict is not None + return self._content_dict.get("charset") + + @property + def content_length(self) -> Optional[int]: + """The value of Content-Length HTTP header.""" + content_length = self._headers.get(hdrs.CONTENT_LENGTH) + return None if content_length is None else int(content_length) + + +def set_result(fut: "asyncio.Future[_T]", result: _T) -> None: + if not fut.done(): + fut.set_result(result) + + +_EXC_SENTINEL = BaseException() + + +class ErrorableProtocol(Protocol): + def set_exception( + self, + exc: BaseException, + exc_cause: BaseException = ..., + ) -> None: ... # pragma: no cover + + +def set_exception( + fut: "asyncio.Future[_T] | ErrorableProtocol", + exc: BaseException, + exc_cause: BaseException = _EXC_SENTINEL, +) -> None: + """Set future exception. + + If the future is marked as complete, this function is a no-op. + + :param exc_cause: An exception that is a direct cause of ``exc``. + Only set if provided. + """ + if asyncio.isfuture(fut) and fut.done(): + return + + exc_is_sentinel = exc_cause is _EXC_SENTINEL + exc_causes_itself = exc is exc_cause + if not exc_is_sentinel and not exc_causes_itself: + exc.__cause__ = exc_cause + + fut.set_exception(exc) + + +@functools.total_ordering +class AppKey(Generic[_T]): + """Keys for static typing support in Application.""" + + __slots__ = ("_name", "_t", "__orig_class__") + + # This may be set by Python when instantiating with a generic type. We need to + # support this, in order to support types that are not concrete classes, + # like Iterable, which can't be passed as the second parameter to __init__. + __orig_class__: Type[object] + + def __init__(self, name: str, t: Optional[Type[_T]] = None): + # Prefix with module name to help deduplicate key names. + frame = inspect.currentframe() + while frame: + if frame.f_code.co_name == "": + module: str = frame.f_globals["__name__"] + break + frame = frame.f_back + + self._name = module + "." + name + self._t = t + + def __lt__(self, other: object) -> bool: + if isinstance(other, AppKey): + return self._name < other._name + return True # Order AppKey above other types. + + def __repr__(self) -> str: + t = self._t + if t is None: + with suppress(AttributeError): + # Set to type arg. + t = get_args(self.__orig_class__)[0] + + if t is None: + t_repr = "<>" + elif isinstance(t, type): + if t.__module__ == "builtins": + t_repr = t.__qualname__ + else: + t_repr = f"{t.__module__}.{t.__qualname__}" + else: + t_repr = repr(t) + return f"" + + +class ChainMapProxy(Mapping[Union[str, AppKey[Any]], Any]): + __slots__ = ("_maps",) + + def __init__(self, maps: Iterable[Mapping[Union[str, AppKey[Any]], Any]]) -> None: + self._maps = tuple(maps) + + def __init_subclass__(cls) -> None: + raise TypeError( + "Inheritance class {} from ChainMapProxy " + "is forbidden".format(cls.__name__) + ) + + @overload # type: ignore[override] + def __getitem__(self, key: AppKey[_T]) -> _T: ... + + @overload + def __getitem__(self, key: str) -> Any: ... + + def __getitem__(self, key: Union[str, AppKey[_T]]) -> Any: + for mapping in self._maps: + try: + return mapping[key] + except KeyError: + pass + raise KeyError(key) + + @overload # type: ignore[override] + def get(self, key: AppKey[_T], default: _S) -> Union[_T, _S]: ... + + @overload + def get(self, key: AppKey[_T], default: None = ...) -> Optional[_T]: ... + + @overload + def get(self, key: str, default: Any = ...) -> Any: ... + + def get(self, key: Union[str, AppKey[_T]], default: Any = None) -> Any: + try: + return self[key] + except KeyError: + return default + + def __len__(self) -> int: + # reuses stored hash values if possible + return len(set().union(*self._maps)) + + def __iter__(self) -> Iterator[Union[str, AppKey[Any]]]: + d: Dict[Union[str, AppKey[Any]], Any] = {} + for mapping in reversed(self._maps): + # reuses stored hash values if possible + d.update(mapping) + return iter(d) + + def __contains__(self, key: object) -> bool: + return any(key in m for m in self._maps) + + def __bool__(self) -> bool: + return any(self._maps) + + def __repr__(self) -> str: + content = ", ".join(map(repr, self._maps)) + return f"ChainMapProxy({content})" + + +# https://tools.ietf.org/html/rfc7232#section-2.3 +_ETAGC = r"[!\x23-\x7E\x80-\xff]+" +_ETAGC_RE = re.compile(_ETAGC) +_QUOTED_ETAG = rf'(W/)?"({_ETAGC})"' +QUOTED_ETAG_RE = re.compile(_QUOTED_ETAG) +LIST_QUOTED_ETAG_RE = re.compile(rf"({_QUOTED_ETAG})(?:\s*,\s*|$)|(.)") + +ETAG_ANY = "*" + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class ETag: + value: str + is_weak: bool = False + + +def validate_etag_value(value: str) -> None: + if value != ETAG_ANY and not _ETAGC_RE.fullmatch(value): + raise ValueError( + f"Value {value!r} is not a valid etag. Maybe it contains '\"'?" + ) + + +def parse_http_date(date_str: Optional[str]) -> Optional[datetime.datetime]: + """Process a date string, return a datetime object""" + if date_str is not None: + timetuple = parsedate(date_str) + if timetuple is not None: + with suppress(ValueError): + return datetime.datetime(*timetuple[:6], tzinfo=datetime.timezone.utc) + return None + + +@functools.lru_cache +def must_be_empty_body(method: str, code: int) -> bool: + """Check if a request must return an empty body.""" + return ( + code in EMPTY_BODY_STATUS_CODES + or method in EMPTY_BODY_METHODS + or (200 <= code < 300 and method in hdrs.METH_CONNECT_ALL) + ) + + +def should_remove_content_length(method: str, code: int) -> bool: + """Check if a Content-Length header should be removed. + + This should always be a subset of must_be_empty_body + """ + # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.6-8 + # https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.5-4 + return code in EMPTY_BODY_STATUS_CODES or ( + 200 <= code < 300 and method in hdrs.METH_CONNECT_ALL + ) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..393e76a15865f3f536a4483ab7a3b1cfcd0702e5 --- /dev/null +++ b/aiohttp/http_parser.py @@ -0,0 +1,1086 @@ +import abc +import asyncio +import re +import string +from contextlib import suppress +from enum import IntEnum +from typing import ( + Any, + ClassVar, + Final, + Generic, + List, + Literal, + NamedTuple, + Optional, + Pattern, + Set, + Tuple, + Type, + TypeVar, + Union, +) + +from multidict import CIMultiDict, CIMultiDictProxy, istr +from yarl import URL + +from . import hdrs +from .base_protocol import BaseProtocol +from .compression_utils import ( + DEFAULT_MAX_DECOMPRESS_SIZE, + HAS_BROTLI, + HAS_ZSTD, + BrotliDecompressor, + ZLibDecompressor, + ZSTDDecompressor, +) +from .helpers import ( + _EXC_SENTINEL, + DEBUG, + EMPTY_BODY_METHODS, + EMPTY_BODY_STATUS_CODES, + NO_EXTENSIONS, + BaseTimerContext, + set_exception, +) +from .http_exceptions import ( + BadHttpMessage, + BadHttpMethod, + BadStatusLine, + ContentEncodingError, + ContentLengthError, + DecompressSizeError, + InvalidHeader, + InvalidURLError, + LineTooLong, + TransferEncodingError, +) +from .http_writer import HttpVersion, HttpVersion10 +from .streams import EMPTY_PAYLOAD, StreamReader +from .typedefs import RawHeaders + +__all__ = ( + "HeadersParser", + "HttpParser", + "HttpRequestParser", + "HttpResponseParser", + "RawRequestMessage", + "RawResponseMessage", +) + +_SEP = Literal[b"\r\n", b"\n"] + +ASCIISET: Final[Set[str]] = set(string.printable) + +# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview +# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens +# +# method = token +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / +# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA +# token = 1*tchar +_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") +TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") +VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) +DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) +HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") + + +class RawRequestMessage(NamedTuple): + method: str + path: str + version: HttpVersion + headers: "CIMultiDictProxy[str]" + raw_headers: RawHeaders + should_close: bool + compression: Optional[str] + upgrade: bool + chunked: bool + url: URL + + +class RawResponseMessage(NamedTuple): + version: HttpVersion + code: int + reason: str + headers: CIMultiDictProxy[str] + raw_headers: RawHeaders + should_close: bool + compression: Optional[str] + upgrade: bool + chunked: bool + + +_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) + + +class ParseState(IntEnum): + + PARSE_NONE = 0 + PARSE_LENGTH = 1 + PARSE_CHUNKED = 2 + PARSE_UNTIL_EOF = 3 + + +class ChunkState(IntEnum): + PARSE_CHUNKED_SIZE = 0 + PARSE_CHUNKED_CHUNK = 1 + PARSE_CHUNKED_CHUNK_EOF = 2 + PARSE_MAYBE_TRAILERS = 3 + PARSE_TRAILERS = 4 + + +class HeadersParser: + def __init__( + self, + max_line_size: int = 8190, + max_headers: int = 32768, + max_field_size: int = 8190, + lax: bool = False, + ) -> None: + self.max_line_size = max_line_size + self.max_headers = max_headers + self.max_field_size = max_field_size + self._lax = lax + + def parse_headers( + self, lines: List[bytes] + ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: + headers: CIMultiDict[str] = CIMultiDict() + # note: "raw" does not mean inclusion of OWS before/after the field value + raw_headers = [] + + lines_idx = 0 + line = lines[lines_idx] + line_count = len(lines) + + while line: + # Parse initial header name : value pair. + try: + bname, bvalue = line.split(b":", 1) + except ValueError: + raise InvalidHeader(line) from None + + if len(bname) == 0: + raise InvalidHeader(bname) + + # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 + if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} + raise InvalidHeader(line) + + bvalue = bvalue.lstrip(b" \t") + if len(bname) > self.max_field_size: + raise LineTooLong( + "request header name {}".format( + bname.decode("utf8", "backslashreplace") + ), + str(self.max_field_size), + str(len(bname)), + ) + name = bname.decode("utf-8", "surrogateescape") + if not TOKENRE.fullmatch(name): + raise InvalidHeader(bname) + + header_length = len(bvalue) + + # next line + lines_idx += 1 + line = lines[lines_idx] + + # consume continuation lines + continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t') + + # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding + if continuation: + bvalue_lst = [bvalue] + while continuation: + header_length += len(line) + if header_length > self.max_field_size: + raise LineTooLong( + "request header field {}".format( + bname.decode("utf8", "backslashreplace") + ), + str(self.max_field_size), + str(header_length), + ) + bvalue_lst.append(line) + + # next line + lines_idx += 1 + if lines_idx < line_count: + line = lines[lines_idx] + if line: + continuation = line[0] in (32, 9) # (' ', '\t') + else: + line = b"" + break + bvalue = b"".join(bvalue_lst) + else: + if header_length > self.max_field_size: + raise LineTooLong( + "request header field {}".format( + bname.decode("utf8", "backslashreplace") + ), + str(self.max_field_size), + str(header_length), + ) + + bvalue = bvalue.strip(b" \t") + value = bvalue.decode("utf-8", "surrogateescape") + + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 + if "\n" in value or "\r" in value or "\x00" in value: + raise InvalidHeader(bvalue) + + headers.add(name, value) + raw_headers.append((bname, bvalue)) + + return (CIMultiDictProxy(headers), tuple(raw_headers)) + + +def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool: + """Check if the upgrade header is supported.""" + u = headers.get(hdrs.UPGRADE, "") + # .lower() can transform non-ascii characters. + return u.isascii() and u.lower() in {"tcp", "websocket"} + + +class HttpParser(abc.ABC, Generic[_MsgT]): + lax: ClassVar[bool] = False + + def __init__( + self, + protocol: Optional[BaseProtocol] = None, + loop: Optional[asyncio.AbstractEventLoop] = None, + limit: int = 2**16, + max_line_size: int = 8190, + max_headers: int = 32768, + max_field_size: int = 8190, + timer: Optional[BaseTimerContext] = None, + code: Optional[int] = None, + method: Optional[str] = None, + payload_exception: Optional[Type[BaseException]] = None, + response_with_body: bool = True, + read_until_eof: bool = False, + auto_decompress: bool = True, + ) -> None: + self.protocol = protocol + self.loop = loop + self.max_line_size = max_line_size + self.max_headers = max_headers + self.max_field_size = max_field_size + self.timer = timer + self.code = code + self.method = method + self.payload_exception = payload_exception + self.response_with_body = response_with_body + self.read_until_eof = read_until_eof + + self._lines: List[bytes] = [] + self._tail = b"" + self._upgraded = False + self._payload = None + self._payload_parser: Optional[HttpPayloadParser] = None + self._auto_decompress = auto_decompress + self._limit = limit + self._headers_parser = HeadersParser( + max_line_size, max_headers, max_field_size, self.lax + ) + + @abc.abstractmethod + def parse_message(self, lines: List[bytes]) -> _MsgT: ... + + @abc.abstractmethod + def _is_chunked_te(self, te: str) -> bool: ... + + def feed_eof(self) -> Optional[_MsgT]: + if self._payload_parser is not None: + self._payload_parser.feed_eof() + self._payload_parser = None + else: + # try to extract partial message + if self._tail: + self._lines.append(self._tail) + + if self._lines: + if self._lines[-1] != "\r\n": + self._lines.append(b"") + with suppress(Exception): + return self.parse_message(self._lines) + return None + + def feed_data( + self, + data: bytes, + SEP: _SEP = b"\r\n", + EMPTY: bytes = b"", + CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, + METH_CONNECT: str = hdrs.METH_CONNECT, + SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, + ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: + + messages = [] + + if self._tail: + data, self._tail = self._tail + data, b"" + + data_len = len(data) + start_pos = 0 + loop = self.loop + + should_close = False + while start_pos < data_len: + + # read HTTP message (request/response line + headers), \r\n\r\n + # and split by lines + if self._payload_parser is None and not self._upgraded: + pos = data.find(SEP, start_pos) + # consume \r\n + if pos == start_pos and not self._lines: + start_pos = pos + len(SEP) + continue + + if pos >= start_pos: + if should_close: + raise BadHttpMessage("Data after `Connection: close`") + + # line found + line = data[start_pos:pos] + if SEP == b"\n": # For lax response parsing + line = line.rstrip(b"\r") + self._lines.append(line) + start_pos = pos + len(SEP) + + # \r\n\r\n found + if self._lines[-1] == EMPTY: + try: + msg: _MsgT = self.parse_message(self._lines) + finally: + self._lines.clear() + + def get_content_length() -> Optional[int]: + # payload length + length_hdr = msg.headers.get(CONTENT_LENGTH) + if length_hdr is None: + return None + + # Shouldn't allow +/- or other number formats. + # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 + # msg.headers is already stripped of leading/trailing wsp + if not DIGITS.fullmatch(length_hdr): + raise InvalidHeader(CONTENT_LENGTH) + + return int(length_hdr) + + length = get_content_length() + # do not support old websocket spec + if SEC_WEBSOCKET_KEY1 in msg.headers: + raise InvalidHeader(SEC_WEBSOCKET_KEY1) + + self._upgraded = msg.upgrade and _is_supported_upgrade( + msg.headers + ) + + method = getattr(msg, "method", self.method) + # code is only present on responses + code = getattr(msg, "code", 0) + + assert self.protocol is not None + # calculate payload + empty_body = code in EMPTY_BODY_STATUS_CODES or bool( + method and method in EMPTY_BODY_METHODS + ) + if not empty_body and ( + ((length is not None and length > 0) or msg.chunked) + and not self._upgraded + ): + payload = StreamReader( + self.protocol, + timer=self.timer, + loop=loop, + limit=self._limit, + ) + payload_parser = HttpPayloadParser( + payload, + length=length, + chunked=msg.chunked, + method=method, + compression=msg.compression, + code=self.code, + response_with_body=self.response_with_body, + auto_decompress=self._auto_decompress, + lax=self.lax, + headers_parser=self._headers_parser, + ) + if not payload_parser.done: + self._payload_parser = payload_parser + elif method == METH_CONNECT: + assert isinstance(msg, RawRequestMessage) + payload = StreamReader( + self.protocol, + timer=self.timer, + loop=loop, + limit=self._limit, + ) + self._upgraded = True + self._payload_parser = HttpPayloadParser( + payload, + method=msg.method, + compression=msg.compression, + auto_decompress=self._auto_decompress, + lax=self.lax, + headers_parser=self._headers_parser, + ) + elif not empty_body and length is None and self.read_until_eof: + payload = StreamReader( + self.protocol, + timer=self.timer, + loop=loop, + limit=self._limit, + ) + payload_parser = HttpPayloadParser( + payload, + length=length, + chunked=msg.chunked, + method=method, + compression=msg.compression, + code=self.code, + response_with_body=self.response_with_body, + auto_decompress=self._auto_decompress, + lax=self.lax, + headers_parser=self._headers_parser, + ) + if not payload_parser.done: + self._payload_parser = payload_parser + else: + payload = EMPTY_PAYLOAD + + messages.append((msg, payload)) + should_close = msg.should_close + else: + self._tail = data[start_pos:] + data = EMPTY + break + + # no parser, just store + elif self._payload_parser is None and self._upgraded: + assert not self._lines + break + + # feed payload + elif data and start_pos < data_len: + assert not self._lines + assert self._payload_parser is not None + try: + eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) + except BaseException as underlying_exc: + reraised_exc = underlying_exc + if self.payload_exception is not None: + reraised_exc = self.payload_exception(str(underlying_exc)) + + set_exception( + self._payload_parser.payload, + reraised_exc, + underlying_exc, + ) + + eof = True + data = b"" + if isinstance( + underlying_exc, (InvalidHeader, TransferEncodingError) + ): + raise + + if eof: + start_pos = 0 + data_len = len(data) + self._payload_parser = None + continue + else: + break + + if data and start_pos < data_len: + data = data[start_pos:] + else: + data = EMPTY + + return messages, self._upgraded, data + + def parse_headers( + self, lines: List[bytes] + ) -> Tuple[ + "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool + ]: + """Parses RFC 5322 headers from a stream. + + Line continuations are supported. Returns list of header name + and value pairs. Header name is in upper case. + """ + headers, raw_headers = self._headers_parser.parse_headers(lines) + close_conn = None + encoding = None + upgrade = False + chunked = False + + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 + # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf + singletons = ( + hdrs.CONTENT_LENGTH, + hdrs.CONTENT_LOCATION, + hdrs.CONTENT_RANGE, + hdrs.CONTENT_TYPE, + hdrs.ETAG, + hdrs.HOST, + hdrs.MAX_FORWARDS, + hdrs.SERVER, + hdrs.TRANSFER_ENCODING, + hdrs.USER_AGENT, + ) + bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) + if bad_hdr is not None: + raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") + + # keep-alive + conn = headers.get(hdrs.CONNECTION) + if conn: + v = conn.lower() + if v == "close": + close_conn = True + elif v == "keep-alive": + close_conn = False + # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols + elif v == "upgrade" and headers.get(hdrs.UPGRADE): + upgrade = True + + # encoding + enc = headers.get(hdrs.CONTENT_ENCODING, "") + if enc.isascii() and enc.lower() in {"gzip", "deflate", "br", "zstd"}: + encoding = enc + + # chunking + te = headers.get(hdrs.TRANSFER_ENCODING) + if te is not None: + if self._is_chunked_te(te): + chunked = True + + if hdrs.CONTENT_LENGTH in headers: + raise BadHttpMessage( + "Transfer-Encoding can't be present with Content-Length", + ) + + return (headers, raw_headers, close_conn, encoding, upgrade, chunked) + + def set_upgraded(self, val: bool) -> None: + """Set connection upgraded (to websocket) mode. + + :param bool val: new state. + """ + self._upgraded = val + + +class HttpRequestParser(HttpParser[RawRequestMessage]): + """Read request status line. + + Exception .http_exceptions.BadStatusLine + could be raised in case of any errors in status line. + Returns RawRequestMessage. + """ + + def parse_message(self, lines: List[bytes]) -> RawRequestMessage: + # request line + line = lines[0].decode("utf-8", "surrogateescape") + try: + method, path, version = line.split(" ", maxsplit=2) + except ValueError: + raise BadHttpMethod(line) from None + + if len(path) > self.max_line_size: + raise LineTooLong( + "Status line is too long", str(self.max_line_size), str(len(path)) + ) + + # method + if not TOKENRE.fullmatch(method): + raise BadHttpMethod(method) + + # version + match = VERSRE.fullmatch(version) + if match is None: + raise BadStatusLine(line) + version_o = HttpVersion(int(match.group(1)), int(match.group(2))) + + if method == "CONNECT": + # authority-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 + url = URL.build(authority=path, encoded=True) + elif path.startswith("/"): + # origin-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 + path_part, _hash_separator, url_fragment = path.partition("#") + path_part, _question_mark_separator, qs_part = path_part.partition("?") + + # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based + # NOTE: parser does, otherwise it results into the same + # NOTE: HTTP Request-Line input producing different + # NOTE: `yarl.URL()` objects + url = URL.build( + path=path_part, + query_string=qs_part, + fragment=url_fragment, + encoded=True, + ) + elif path == "*" and method == "OPTIONS": + # asterisk-form, + url = URL(path, encoded=True) + else: + # absolute-form for proxy maybe, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 + url = URL(path, encoded=True) + if url.scheme == "": + # not absolute-form + raise InvalidURLError( + path.encode(errors="surrogateescape").decode("latin1") + ) + + # read headers + ( + headers, + raw_headers, + close, + compression, + upgrade, + chunked, + ) = self.parse_headers(lines[1:]) + + if close is None: # then the headers weren't set in the request + if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close + close = True + else: # HTTP 1.1 must ask to close. + close = False + + return RawRequestMessage( + method, + path, + version_o, + headers, + raw_headers, + close, + compression, + upgrade, + chunked, + url, + ) + + def _is_chunked_te(self, te: str) -> bool: + te = te.rsplit(",", maxsplit=1)[-1].strip(" \t") + # .lower() transforms some non-ascii chars, so must check first. + if te.isascii() and te.lower() == "chunked": + return True + # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3 + raise BadHttpMessage("Request has invalid `Transfer-Encoding`") + + +class HttpResponseParser(HttpParser[RawResponseMessage]): + """Read response status line and headers. + + BadStatusLine could be raised in case of any errors in status line. + Returns RawResponseMessage. + """ + + # Lax mode should only be enabled on response parser. + lax = not DEBUG + + def feed_data( + self, + data: bytes, + SEP: Optional[_SEP] = None, + *args: Any, + **kwargs: Any, + ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: + if SEP is None: + SEP = b"\r\n" if DEBUG else b"\n" + return super().feed_data(data, SEP, *args, **kwargs) + + def parse_message(self, lines: List[bytes]) -> RawResponseMessage: + line = lines[0].decode("utf-8", "surrogateescape") + try: + version, status = line.split(maxsplit=1) + except ValueError: + raise BadStatusLine(line) from None + + try: + status, reason = status.split(maxsplit=1) + except ValueError: + status = status.strip() + reason = "" + + if len(reason) > self.max_line_size: + raise LineTooLong( + "Status line is too long", str(self.max_line_size), str(len(reason)) + ) + + # version + match = VERSRE.fullmatch(version) + if match is None: + raise BadStatusLine(line) + version_o = HttpVersion(int(match.group(1)), int(match.group(2))) + + # The status code is a three-digit ASCII number, no padding + if len(status) != 3 or not DIGITS.fullmatch(status): + raise BadStatusLine(line) + status_i = int(status) + + # read headers + ( + headers, + raw_headers, + close, + compression, + upgrade, + chunked, + ) = self.parse_headers(lines[1:]) + + if close is None: + if version_o <= HttpVersion10: + close = True + # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length + elif 100 <= status_i < 200 or status_i in {204, 304}: + close = False + elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers: + close = False + else: + # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8 + close = True + + return RawResponseMessage( + version_o, + status_i, + reason.strip(), + headers, + raw_headers, + close, + compression, + upgrade, + chunked, + ) + + def _is_chunked_te(self, te: str) -> bool: + # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2 + return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked" + + +class HttpPayloadParser: + def __init__( + self, + payload: StreamReader, + length: Optional[int] = None, + chunked: bool = False, + compression: Optional[str] = None, + code: Optional[int] = None, + method: Optional[str] = None, + response_with_body: bool = True, + auto_decompress: bool = True, + lax: bool = False, + *, + headers_parser: HeadersParser, + ) -> None: + self._length = 0 + self._type = ParseState.PARSE_UNTIL_EOF + self._chunk = ChunkState.PARSE_CHUNKED_SIZE + self._chunk_size = 0 + self._chunk_tail = b"" + self._auto_decompress = auto_decompress + self._lax = lax + self._headers_parser = headers_parser + self._trailer_lines: list[bytes] = [] + self.done = False + + # payload decompression wrapper + if response_with_body and compression and self._auto_decompress: + real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( + payload, compression + ) + else: + real_payload = payload + + # payload parser + if not response_with_body: + # don't parse payload if it's not expected to be received + self._type = ParseState.PARSE_NONE + real_payload.feed_eof() + self.done = True + elif chunked: + self._type = ParseState.PARSE_CHUNKED + elif length is not None: + self._type = ParseState.PARSE_LENGTH + self._length = length + if self._length == 0: + real_payload.feed_eof() + self.done = True + + self.payload = real_payload + + def feed_eof(self) -> None: + if self._type == ParseState.PARSE_UNTIL_EOF: + self.payload.feed_eof() + elif self._type == ParseState.PARSE_LENGTH: + raise ContentLengthError( + "Not enough data to satisfy content length header." + ) + elif self._type == ParseState.PARSE_CHUNKED: + raise TransferEncodingError( + "Not enough data to satisfy transfer length header." + ) + + def feed_data( + self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" + ) -> Tuple[bool, bytes]: + # Read specified amount of bytes + if self._type == ParseState.PARSE_LENGTH: + required = self._length + chunk_len = len(chunk) + + if required >= chunk_len: + self._length = required - chunk_len + self.payload.feed_data(chunk, chunk_len) + if self._length == 0: + self.payload.feed_eof() + return True, b"" + else: + self._length = 0 + self.payload.feed_data(chunk[:required], required) + self.payload.feed_eof() + return True, chunk[required:] + + # Chunked transfer encoding parser + elif self._type == ParseState.PARSE_CHUNKED: + if self._chunk_tail: + chunk = self._chunk_tail + chunk + self._chunk_tail = b"" + + while chunk: + + # read next chunk size + if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: + pos = chunk.find(SEP) + if pos >= 0: + i = chunk.find(CHUNK_EXT, 0, pos) + if i >= 0: + size_b = chunk[:i] # strip chunk-extensions + # Verify no LF in the chunk-extension + if b"\n" in (ext := chunk[i:pos]): + exc = TransferEncodingError( + f"Unexpected LF in chunk-extension: {ext!r}" + ) + set_exception(self.payload, exc) + raise exc + else: + size_b = chunk[:pos] + + if self._lax: # Allow whitespace in lax mode. + size_b = size_b.strip() + + if not re.fullmatch(HEXDIGITS, size_b): + exc = TransferEncodingError( + chunk[:pos].decode("ascii", "surrogateescape") + ) + set_exception(self.payload, exc) + raise exc + size = int(bytes(size_b), 16) + + chunk = chunk[pos + len(SEP) :] + if size == 0: # eof marker + self._chunk = ChunkState.PARSE_TRAILERS + if self._lax and chunk.startswith(b"\r"): + chunk = chunk[1:] + else: + self._chunk = ChunkState.PARSE_CHUNKED_CHUNK + self._chunk_size = size + self.payload.begin_http_chunk_receiving() + else: + self._chunk_tail = chunk + return False, b"" + + # read chunk and feed buffer + if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: + required = self._chunk_size + chunk_len = len(chunk) + + if required > chunk_len: + self._chunk_size = required - chunk_len + self.payload.feed_data(chunk, chunk_len) + return False, b"" + else: + self._chunk_size = 0 + self.payload.feed_data(chunk[:required], required) + chunk = chunk[required:] + self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF + self.payload.end_http_chunk_receiving() + + # toss the CRLF at the end of the chunk + if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: + if self._lax and chunk.startswith(b"\r"): + chunk = chunk[1:] + if chunk[: len(SEP)] == SEP: + chunk = chunk[len(SEP) :] + self._chunk = ChunkState.PARSE_CHUNKED_SIZE + else: + self._chunk_tail = chunk + return False, b"" + + if self._chunk == ChunkState.PARSE_TRAILERS: + pos = chunk.find(SEP) + if pos < 0: # No line found + self._chunk_tail = chunk + return False, b"" + + line = chunk[:pos] + chunk = chunk[pos + len(SEP) :] + if SEP == b"\n": # For lax response parsing + line = line.rstrip(b"\r") + self._trailer_lines.append(line) + + # \r\n\r\n found, end of stream + if self._trailer_lines[-1] == b"": + # Headers and trailers are defined the same way, + # so we reuse the HeadersParser here. + try: + trailers, raw_trailers = self._headers_parser.parse_headers( + self._trailer_lines + ) + finally: + self._trailer_lines.clear() + self.payload.feed_eof() + return True, chunk + + # Read all bytes until eof + elif self._type == ParseState.PARSE_UNTIL_EOF: + self.payload.feed_data(chunk, len(chunk)) + + return False, b"" + + +class DeflateBuffer: + """DeflateStream decompress stream and feed data into specified stream.""" + + decompressor: Any + + def __init__( + self, + out: StreamReader, + encoding: Optional[str], + max_decompress_size: int = DEFAULT_MAX_DECOMPRESS_SIZE, + ) -> None: + self.out = out + self.size = 0 + out.total_compressed_bytes = self.size + self.encoding = encoding + self._started_decoding = False + + self.decompressor: Union[BrotliDecompressor, ZLibDecompressor, ZSTDDecompressor] + if encoding == "br": + if not HAS_BROTLI: # pragma: no cover + raise ContentEncodingError( + "Can not decode content-encoding: brotli (br). " + "Please install `Brotli`" + ) + self.decompressor = BrotliDecompressor() + elif encoding == "zstd": + if not HAS_ZSTD: + raise ContentEncodingError( + "Can not decode content-encoding: zstandard (zstd). " + "Please install `backports.zstd`" + ) + self.decompressor = ZSTDDecompressor() + else: + self.decompressor = ZLibDecompressor(encoding=encoding) + + self._max_decompress_size = max_decompress_size + + def set_exception( + self, + exc: BaseException, + exc_cause: BaseException = _EXC_SENTINEL, + ) -> None: + set_exception(self.out, exc, exc_cause) + + def feed_data(self, chunk: bytes, size: int) -> None: + if not size: + return + + self.size += size + self.out.total_compressed_bytes = self.size + + # RFC1950 + # bits 0..3 = CM = 0b1000 = 8 = "deflate" + # bits 4..7 = CINFO = 1..7 = windows size. + if ( + not self._started_decoding + and self.encoding == "deflate" + and chunk[0] & 0xF != 8 + ): + # Change the decoder to decompress incorrectly compressed data + # Actually we should issue a warning about non-RFC-compliant data. + self.decompressor = ZLibDecompressor( + encoding=self.encoding, suppress_deflate_header=True + ) + + try: + # Decompress with limit + 1 so we can detect if output exceeds limit + chunk = self.decompressor.decompress_sync( + chunk, max_length=self._max_decompress_size + 1 + ) + except Exception: + raise ContentEncodingError( + "Can not decode content-encoding: %s" % self.encoding + ) + + self._started_decoding = True + + # Check if decompression limit was exceeded + if len(chunk) > self._max_decompress_size: + raise DecompressSizeError( + "Decompressed data exceeds the configured limit of %d bytes" + % self._max_decompress_size + ) + + if chunk: + self.out.feed_data(chunk, len(chunk)) + + def feed_eof(self) -> None: + chunk = self.decompressor.flush() + + if chunk or self.size > 0: + self.out.feed_data(chunk, len(chunk)) + if self.encoding == "deflate" and not self.decompressor.eof: + raise ContentEncodingError("deflate") + + self.out.feed_eof() + + def begin_http_chunk_receiving(self) -> None: + self.out.begin_http_chunk_receiving() + + def end_http_chunk_receiving(self) -> None: + self.out.end_http_chunk_receiving() + + +HttpRequestParserPy = HttpRequestParser +HttpResponseParserPy = HttpResponseParser +RawRequestMessagePy = RawRequestMessage +RawResponseMessagePy = RawResponseMessage + +try: + if not NO_EXTENSIONS: + from ._http_parser import ( # type: ignore[import-not-found,no-redef] + HttpRequestParser, + HttpResponseParser, + RawRequestMessage, + RawResponseMessage, + ) + + HttpRequestParserC = HttpRequestParser + HttpResponseParserC = HttpResponseParser + RawRequestMessageC = RawRequestMessage + RawResponseMessageC = RawResponseMessage +except ImportError: # pragma: no cover + pass diff --git a/aiohttp/http_writer.py b/aiohttp/http_writer.py new file mode 100644 index 0000000000000000000000000000000000000000..a140b218b25fedccb49451c547d4d01326367cfb --- /dev/null +++ b/aiohttp/http_writer.py @@ -0,0 +1,378 @@ +"""Http related parsers and protocol.""" + +import asyncio +import sys +from typing import ( # noqa + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Iterable, + List, + NamedTuple, + Optional, + Union, +) + +from multidict import CIMultiDict + +from .abc import AbstractStreamWriter +from .base_protocol import BaseProtocol +from .client_exceptions import ClientConnectionResetError +from .compression_utils import ZLibCompressor +from .helpers import NO_EXTENSIONS + +__all__ = ("StreamWriter", "HttpVersion", "HttpVersion10", "HttpVersion11") + + +MIN_PAYLOAD_FOR_WRITELINES = 2048 +IS_PY313_BEFORE_313_2 = (3, 13, 0) <= sys.version_info < (3, 13, 2) +IS_PY_BEFORE_312_9 = sys.version_info < (3, 12, 9) +SKIP_WRITELINES = IS_PY313_BEFORE_313_2 or IS_PY_BEFORE_312_9 +# writelines is not safe for use +# on Python 3.12+ until 3.12.9 +# on Python 3.13+ until 3.13.2 +# and on older versions it not any faster than write +# CVE-2024-12254: https://github.com/python/cpython/pull/127656 + + +class HttpVersion(NamedTuple): + major: int + minor: int + + +HttpVersion10 = HttpVersion(1, 0) +HttpVersion11 = HttpVersion(1, 1) + + +_T_OnChunkSent = Optional[Callable[[bytes], Awaitable[None]]] +_T_OnHeadersSent = Optional[Callable[["CIMultiDict[str]"], Awaitable[None]]] + + +class StreamWriter(AbstractStreamWriter): + + length: Optional[int] = None + chunked: bool = False + _eof: bool = False + _compress: Optional[ZLibCompressor] = None + + def __init__( + self, + protocol: BaseProtocol, + loop: asyncio.AbstractEventLoop, + on_chunk_sent: _T_OnChunkSent = None, + on_headers_sent: _T_OnHeadersSent = None, + ) -> None: + self._protocol = protocol + self.loop = loop + self._on_chunk_sent: _T_OnChunkSent = on_chunk_sent + self._on_headers_sent: _T_OnHeadersSent = on_headers_sent + self._headers_buf: Optional[bytes] = None + self._headers_written: bool = False + + @property + def transport(self) -> Optional[asyncio.Transport]: + return self._protocol.transport + + @property + def protocol(self) -> BaseProtocol: + return self._protocol + + def enable_chunking(self) -> None: + self.chunked = True + + def enable_compression( + self, encoding: str = "deflate", strategy: Optional[int] = None + ) -> None: + self._compress = ZLibCompressor(encoding=encoding, strategy=strategy) + + def _write(self, chunk: Union[bytes, bytearray, memoryview]) -> None: + size = len(chunk) + self.buffer_size += size + self.output_size += size + transport = self._protocol.transport + if transport is None or transport.is_closing(): + raise ClientConnectionResetError("Cannot write to closing transport") + transport.write(chunk) + + def _writelines(self, chunks: Iterable[bytes]) -> None: + size = 0 + for chunk in chunks: + size += len(chunk) + self.buffer_size += size + self.output_size += size + transport = self._protocol.transport + if transport is None or transport.is_closing(): + raise ClientConnectionResetError("Cannot write to closing transport") + if SKIP_WRITELINES or size < MIN_PAYLOAD_FOR_WRITELINES: + transport.write(b"".join(chunks)) + else: + transport.writelines(chunks) + + def _write_chunked_payload( + self, chunk: Union[bytes, bytearray, "memoryview[int]", "memoryview[bytes]"] + ) -> None: + """Write a chunk with proper chunked encoding.""" + chunk_len_pre = f"{len(chunk):x}\r\n".encode("ascii") + self._writelines((chunk_len_pre, chunk, b"\r\n")) + + def _send_headers_with_payload( + self, + chunk: Union[bytes, bytearray, "memoryview[int]", "memoryview[bytes]"], + is_eof: bool, + ) -> None: + """Send buffered headers with payload, coalescing into single write.""" + # Mark headers as written + self._headers_written = True + headers_buf = self._headers_buf + self._headers_buf = None + + if TYPE_CHECKING: + # Safe because callers (write() and write_eof()) only invoke this method + # after checking that self._headers_buf is truthy + assert headers_buf is not None + + if not self.chunked: + # Non-chunked: coalesce headers with body + if chunk: + self._writelines((headers_buf, chunk)) + else: + self._write(headers_buf) + return + + # Coalesce headers with chunked data + if chunk: + chunk_len_pre = f"{len(chunk):x}\r\n".encode("ascii") + if is_eof: + self._writelines((headers_buf, chunk_len_pre, chunk, b"\r\n0\r\n\r\n")) + else: + self._writelines((headers_buf, chunk_len_pre, chunk, b"\r\n")) + elif is_eof: + self._writelines((headers_buf, b"0\r\n\r\n")) + else: + self._write(headers_buf) + + async def write( + self, + chunk: Union[bytes, bytearray, memoryview], + *, + drain: bool = True, + LIMIT: int = 0x10000, + ) -> None: + """ + Writes chunk of data to a stream. + + write_eof() indicates end of stream. + writer can't be used after write_eof() method being called. + write() return drain future. + """ + if self._on_chunk_sent is not None: + await self._on_chunk_sent(chunk) + + if isinstance(chunk, memoryview): + if chunk.nbytes != len(chunk): + # just reshape it + chunk = chunk.cast("c") + + if self._compress is not None: + chunk = await self._compress.compress(chunk) + if not chunk: + return + + if self.length is not None: + chunk_len = len(chunk) + if self.length >= chunk_len: + self.length = self.length - chunk_len + else: + chunk = chunk[: self.length] + self.length = 0 + if not chunk: + return + + # Handle buffered headers for small payload optimization + if self._headers_buf and not self._headers_written: + self._send_headers_with_payload(chunk, False) + if drain and self.buffer_size > LIMIT: + self.buffer_size = 0 + await self.drain() + return + + if chunk: + if self.chunked: + self._write_chunked_payload(chunk) + else: + self._write(chunk) + + if drain and self.buffer_size > LIMIT: + self.buffer_size = 0 + await self.drain() + + async def write_headers( + self, status_line: str, headers: "CIMultiDict[str]" + ) -> None: + """Write headers to the stream.""" + if self._on_headers_sent is not None: + await self._on_headers_sent(headers) + # status + headers + buf = _serialize_headers(status_line, headers) + self._headers_written = False + self._headers_buf = buf + + def send_headers(self) -> None: + """Force sending buffered headers if not already sent.""" + if not self._headers_buf or self._headers_written: + return + + self._headers_written = True + headers_buf = self._headers_buf + self._headers_buf = None + + if TYPE_CHECKING: + # Safe because we only enter this block when self._headers_buf is truthy + assert headers_buf is not None + + self._write(headers_buf) + + def set_eof(self) -> None: + """Indicate that the message is complete.""" + if self._eof: + return + + # If headers haven't been sent yet, send them now + # This handles the case where there's no body at all + if self._headers_buf and not self._headers_written: + self._headers_written = True + headers_buf = self._headers_buf + self._headers_buf = None + + if TYPE_CHECKING: + # Safe because we only enter this block when self._headers_buf is truthy + assert headers_buf is not None + + # Combine headers and chunked EOF marker in a single write + if self.chunked: + self._writelines((headers_buf, b"0\r\n\r\n")) + else: + self._write(headers_buf) + elif self.chunked and self._headers_written: + # Headers already sent, just send the final chunk marker + self._write(b"0\r\n\r\n") + + self._eof = True + + async def write_eof(self, chunk: bytes = b"") -> None: + if self._eof: + return + + if chunk and self._on_chunk_sent is not None: + await self._on_chunk_sent(chunk) + + # Handle body/compression + if self._compress: + chunks: List[bytes] = [] + chunks_len = 0 + if chunk and (compressed_chunk := await self._compress.compress(chunk)): + chunks_len = len(compressed_chunk) + chunks.append(compressed_chunk) + + flush_chunk = self._compress.flush() + chunks_len += len(flush_chunk) + chunks.append(flush_chunk) + assert chunks_len + + # Send buffered headers with compressed data if not yet sent + if self._headers_buf and not self._headers_written: + self._headers_written = True + headers_buf = self._headers_buf + self._headers_buf = None + + if self.chunked: + # Coalesce headers with compressed chunked data + chunk_len_pre = f"{chunks_len:x}\r\n".encode("ascii") + self._writelines( + (headers_buf, chunk_len_pre, *chunks, b"\r\n0\r\n\r\n") + ) + else: + # Coalesce headers with compressed data + self._writelines((headers_buf, *chunks)) + await self.drain() + self._eof = True + return + + # Headers already sent, just write compressed data + if self.chunked: + chunk_len_pre = f"{chunks_len:x}\r\n".encode("ascii") + self._writelines((chunk_len_pre, *chunks, b"\r\n0\r\n\r\n")) + elif len(chunks) > 1: + self._writelines(chunks) + else: + self._write(chunks[0]) + await self.drain() + self._eof = True + return + + # No compression - send buffered headers if not yet sent + if self._headers_buf and not self._headers_written: + # Use helper to send headers with payload + self._send_headers_with_payload(chunk, True) + await self.drain() + self._eof = True + return + + # Handle remaining body + if self.chunked: + if chunk: + # Write final chunk with EOF marker + self._writelines( + (f"{len(chunk):x}\r\n".encode("ascii"), chunk, b"\r\n0\r\n\r\n") + ) + else: + self._write(b"0\r\n\r\n") + await self.drain() + self._eof = True + return + + if chunk: + self._write(chunk) + await self.drain() + + self._eof = True + + async def drain(self) -> None: + """Flush the write buffer. + + The intended use is to write + + await w.write(data) + await w.drain() + """ + protocol = self._protocol + if protocol.transport is not None and protocol._paused: + await protocol._drain_helper() + + +def _safe_header(string: str) -> str: + if "\r" in string or "\n" in string: + raise ValueError( + "Newline or carriage return detected in headers. " + "Potential header injection attack." + ) + return string + + +def _py_serialize_headers(status_line: str, headers: "CIMultiDict[str]") -> bytes: + headers_gen = (_safe_header(k) + ": " + _safe_header(v) for k, v in headers.items()) + line = status_line + "\r\n" + "\r\n".join(headers_gen) + "\r\n\r\n" + return line.encode("utf-8") + + +_serialize_headers = _py_serialize_headers + +try: + import aiohttp._http_writer as _http_writer # type: ignore[import-not-found] + + _c_serialize_headers = _http_writer._serialize_headers + if not NO_EXTENSIONS: + _serialize_headers = _c_serialize_headers +except ImportError: + pass diff --git a/aiohttp/payload_streamer.py b/aiohttp/payload_streamer.py new file mode 100644 index 0000000000000000000000000000000000000000..831fdc0a77f302acaf9a000be408fe7c9a9035aa --- /dev/null +++ b/aiohttp/payload_streamer.py @@ -0,0 +1,78 @@ +""" +Payload implementation for coroutines as data provider. + +As a simple case, you can upload data from file:: + + @aiohttp.streamer + async def file_sender(writer, file_name=None): + with open(file_name, 'rb') as f: + chunk = f.read(2**16) + while chunk: + await writer.write(chunk) + + chunk = f.read(2**16) + +Then you can use `file_sender` like this: + + async with session.post('http://httpbin.org/post', + data=file_sender(file_name='huge_file')) as resp: + print(await resp.text()) + +..note:: Coroutine must accept `writer` as first argument + +""" + +import types +import warnings +from typing import Any, Awaitable, Callable, Dict, Tuple + +from .abc import AbstractStreamWriter +from .payload import Payload, payload_type + +__all__ = ("streamer",) + + +class _stream_wrapper: + def __init__( + self, + coro: Callable[..., Awaitable[None]], + args: Tuple[Any, ...], + kwargs: Dict[str, Any], + ) -> None: + self.coro = types.coroutine(coro) + self.args = args + self.kwargs = kwargs + + async def __call__(self, writer: AbstractStreamWriter) -> None: + await self.coro(writer, *self.args, **self.kwargs) + + +class streamer: + def __init__(self, coro: Callable[..., Awaitable[None]]) -> None: + warnings.warn( + "@streamer is deprecated, use async generators instead", + DeprecationWarning, + stacklevel=2, + ) + self.coro = coro + + def __call__(self, *args: Any, **kwargs: Any) -> _stream_wrapper: + return _stream_wrapper(self.coro, args, kwargs) + + +@payload_type(_stream_wrapper) +class StreamWrapperPayload(Payload): + async def write(self, writer: AbstractStreamWriter) -> None: + await self._value(writer) + + def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str: + raise TypeError("Unable to decode.") + + +@payload_type(streamer) +class StreamPayload(StreamWrapperPayload): + def __init__(self, value: Any, *args: Any, **kwargs: Any) -> None: + super().__init__(value(), *args, **kwargs) + + async def write(self, writer: AbstractStreamWriter) -> None: + await self._value(writer) diff --git a/aiohttp/py.typed b/aiohttp/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..f5642f79f21d872f010979dcf6f0c4a415acc19d --- /dev/null +++ b/aiohttp/py.typed @@ -0,0 +1 @@ +Marker diff --git a/aiohttp/pytest_plugin.py b/aiohttp/pytest_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..7d59fe820d697632c9a3311ff96841ebc0ee735b --- /dev/null +++ b/aiohttp/pytest_plugin.py @@ -0,0 +1,444 @@ +import asyncio +import contextlib +import inspect +import warnings +from typing import ( + Any, + Awaitable, + Callable, + Dict, + Iterator, + Optional, + Protocol, + Union, + overload, +) + +import pytest + +from .test_utils import ( + BaseTestServer, + RawTestServer, + TestClient, + TestServer, + loop_context, + setup_test_loop, + teardown_test_loop, + unused_port as _unused_port, +) +from .web import Application, BaseRequest, Request +from .web_protocol import _RequestHandler + +try: + import uvloop +except ImportError: # pragma: no cover + uvloop = None # type: ignore[assignment] + + +class AiohttpClient(Protocol): + @overload + async def __call__( + self, + __param: Application, + *, + server_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> TestClient[Request, Application]: ... + @overload + async def __call__( + self, + __param: BaseTestServer, + *, + server_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> TestClient[BaseRequest, None]: ... + + +class AiohttpServer(Protocol): + def __call__( + self, app: Application, *, port: Optional[int] = None, **kwargs: Any + ) -> Awaitable[TestServer]: ... + + +class AiohttpRawServer(Protocol): + def __call__( + self, handler: _RequestHandler, *, port: Optional[int] = None, **kwargs: Any + ) -> Awaitable[RawTestServer]: ... + + +def pytest_addoption(parser): # type: ignore[no-untyped-def] + parser.addoption( + "--aiohttp-fast", + action="store_true", + default=False, + help="run tests faster by disabling extra checks", + ) + parser.addoption( + "--aiohttp-loop", + action="store", + default="pyloop", + help="run tests with specific loop: pyloop, uvloop or all", + ) + parser.addoption( + "--aiohttp-enable-loop-debug", + action="store_true", + default=False, + help="enable event loop debug mode", + ) + + +def pytest_fixture_setup(fixturedef): # type: ignore[no-untyped-def] + """Set up pytest fixture. + + Allow fixtures to be coroutines. Run coroutine fixtures in an event loop. + """ + func = fixturedef.func + + if inspect.isasyncgenfunction(func): + # async generator fixture + is_async_gen = True + elif inspect.iscoroutinefunction(func): + # regular async fixture + is_async_gen = False + else: + # not an async fixture, nothing to do + return + + strip_request = False + if "request" not in fixturedef.argnames: + fixturedef.argnames += ("request",) + strip_request = True + + def wrapper(*args, **kwargs): # type: ignore[no-untyped-def] + request = kwargs["request"] + if strip_request: + del kwargs["request"] + + # if neither the fixture nor the test use the 'loop' fixture, + # 'getfixturevalue' will fail because the test is not parameterized + # (this can be removed someday if 'loop' is no longer parameterized) + if "loop" not in request.fixturenames: + raise Exception( + "Asynchronous fixtures must depend on the 'loop' fixture or " + "be used in tests depending from it." + ) + + _loop = request.getfixturevalue("loop") + + if is_async_gen: + # for async generators, we need to advance the generator once, + # then advance it again in a finalizer + gen = func(*args, **kwargs) + + def finalizer(): # type: ignore[no-untyped-def] + try: + return _loop.run_until_complete(gen.__anext__()) + except StopAsyncIteration: + pass + + request.addfinalizer(finalizer) + return _loop.run_until_complete(gen.__anext__()) + else: + return _loop.run_until_complete(func(*args, **kwargs)) + + fixturedef.func = wrapper + + +@pytest.fixture +def fast(request): # type: ignore[no-untyped-def] + """--fast config option""" + return request.config.getoption("--aiohttp-fast") + + +@pytest.fixture +def loop_debug(request): # type: ignore[no-untyped-def] + """--enable-loop-debug config option""" + return request.config.getoption("--aiohttp-enable-loop-debug") + + +@contextlib.contextmanager +def _runtime_warning_context(): # type: ignore[no-untyped-def] + """Context manager which checks for RuntimeWarnings. + + This exists specifically to + avoid "coroutine 'X' was never awaited" warnings being missed. + + If RuntimeWarnings occur in the context a RuntimeError is raised. + """ + with warnings.catch_warnings(record=True) as _warnings: + yield + rw = [ + "{w.filename}:{w.lineno}:{w.message}".format(w=w) + for w in _warnings + if w.category == RuntimeWarning + ] + if rw: + raise RuntimeError( + "{} Runtime Warning{},\n{}".format( + len(rw), "" if len(rw) == 1 else "s", "\n".join(rw) + ) + ) + + +@contextlib.contextmanager +def _passthrough_loop_context(loop, fast=False): # type: ignore[no-untyped-def] + """Passthrough loop context. + + Sets up and tears down a loop unless one is passed in via the loop + argument when it's passed straight through. + """ + if loop: + # loop already exists, pass it straight through + yield loop + else: + # this shadows loop_context's standard behavior + loop = setup_test_loop() + yield loop + teardown_test_loop(loop, fast=fast) + + +def pytest_pycollect_makeitem(collector, name, obj): # type: ignore[no-untyped-def] + """Fix pytest collecting for coroutines.""" + if collector.funcnamefilter(name) and inspect.iscoroutinefunction(obj): + return list(collector._genfunctions(name, obj)) + + +def pytest_pyfunc_call(pyfuncitem): # type: ignore[no-untyped-def] + """Run coroutines in an event loop instead of a normal function call.""" + fast = pyfuncitem.config.getoption("--aiohttp-fast") + if inspect.iscoroutinefunction(pyfuncitem.function): + existing_loop = ( + pyfuncitem.funcargs.get("proactor_loop") + or pyfuncitem.funcargs.get("selector_loop") + or pyfuncitem.funcargs.get("uvloop_loop") + or pyfuncitem.funcargs.get("loop", None) + ) + + with _runtime_warning_context(): + with _passthrough_loop_context(existing_loop, fast=fast) as _loop: + testargs = { + arg: pyfuncitem.funcargs[arg] + for arg in pyfuncitem._fixtureinfo.argnames + } + _loop.run_until_complete(pyfuncitem.obj(**testargs)) + + return True + + +def pytest_generate_tests(metafunc): # type: ignore[no-untyped-def] + if "loop_factory" not in metafunc.fixturenames: + return + + loops = metafunc.config.option.aiohttp_loop + avail_factories: dict[str, Callable[[], asyncio.AbstractEventLoop]] + avail_factories = {"pyloop": asyncio.new_event_loop} + + if uvloop is not None: # pragma: no cover + avail_factories["uvloop"] = uvloop.new_event_loop + + if loops == "all": + loops = "pyloop,uvloop?" + + factories = {} # type: ignore[var-annotated] + for name in loops.split(","): + required = not name.endswith("?") + name = name.strip(" ?") + if name not in avail_factories: # pragma: no cover + if required: + raise ValueError( + "Unknown loop '%s', available loops: %s" + % (name, list(factories.keys())) + ) + else: + continue + factories[name] = avail_factories[name] + metafunc.parametrize( + "loop_factory", list(factories.values()), ids=list(factories.keys()) + ) + + +@pytest.fixture +def loop( + loop_factory: Callable[[], asyncio.AbstractEventLoop], + fast: bool, + loop_debug: bool, +) -> Iterator[asyncio.AbstractEventLoop]: + """Return an instance of the event loop.""" + with loop_context(loop_factory, fast=fast) as _loop: + if loop_debug: + _loop.set_debug(True) # pragma: no cover + asyncio.set_event_loop(_loop) + yield _loop + + +@pytest.fixture +def proactor_loop() -> Iterator[asyncio.AbstractEventLoop]: + factory = asyncio.ProactorEventLoop # type: ignore[attr-defined] + + with loop_context(factory) as _loop: + asyncio.set_event_loop(_loop) + yield _loop + + +@pytest.fixture +def unused_port(aiohttp_unused_port: Callable[[], int]) -> Callable[[], int]: + warnings.warn( + "Deprecated, use aiohttp_unused_port fixture instead", + DeprecationWarning, + stacklevel=2, + ) + return aiohttp_unused_port + + +@pytest.fixture +def aiohttp_unused_port() -> Callable[[], int]: + """Return a port that is unused on the current host.""" + return _unused_port + + +@pytest.fixture +def aiohttp_server(loop: asyncio.AbstractEventLoop) -> Iterator[AiohttpServer]: + """Factory to create a TestServer instance, given an app. + + aiohttp_server(app, **kwargs) + """ + servers = [] + + async def go( + app: Application, + *, + host: str = "127.0.0.1", + port: Optional[int] = None, + **kwargs: Any, + ) -> TestServer: + server = TestServer(app, host=host, port=port) + await server.start_server(loop=loop, **kwargs) + servers.append(server) + return server + + yield go + + async def finalize() -> None: + while servers: + await servers.pop().close() + + loop.run_until_complete(finalize()) + + +@pytest.fixture +def test_server(aiohttp_server): # type: ignore[no-untyped-def] # pragma: no cover + warnings.warn( + "Deprecated, use aiohttp_server fixture instead", + DeprecationWarning, + stacklevel=2, + ) + return aiohttp_server + + +@pytest.fixture +def aiohttp_raw_server(loop: asyncio.AbstractEventLoop) -> Iterator[AiohttpRawServer]: + """Factory to create a RawTestServer instance, given a web handler. + + aiohttp_raw_server(handler, **kwargs) + """ + servers = [] + + async def go( + handler: _RequestHandler, *, port: Optional[int] = None, **kwargs: Any + ) -> RawTestServer: + server = RawTestServer(handler, port=port) + await server.start_server(loop=loop, **kwargs) + servers.append(server) + return server + + yield go + + async def finalize() -> None: + while servers: + await servers.pop().close() + + loop.run_until_complete(finalize()) + + +@pytest.fixture +def raw_test_server( # type: ignore[no-untyped-def] # pragma: no cover + aiohttp_raw_server, +): + warnings.warn( + "Deprecated, use aiohttp_raw_server fixture instead", + DeprecationWarning, + stacklevel=2, + ) + return aiohttp_raw_server + + +@pytest.fixture +def aiohttp_client(loop: asyncio.AbstractEventLoop) -> Iterator[AiohttpClient]: + """Factory to create a TestClient instance. + + aiohttp_client(app, **kwargs) + aiohttp_client(server, **kwargs) + aiohttp_client(raw_server, **kwargs) + """ + clients = [] + + @overload + async def go( + __param: Application, + *, + server_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> TestClient[Request, Application]: ... + + @overload + async def go( + __param: BaseTestServer, + *, + server_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> TestClient[BaseRequest, None]: ... + + async def go( + __param: Union[Application, BaseTestServer], + *args: Any, + server_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> TestClient[Any, Any]: + if isinstance(__param, Callable) and not isinstance( # type: ignore[arg-type] + __param, (Application, BaseTestServer) + ): + __param = __param(loop, *args, **kwargs) + kwargs = {} + else: + assert not args, "args should be empty" + + if isinstance(__param, Application): + server_kwargs = server_kwargs or {} + server = TestServer(__param, loop=loop, **server_kwargs) + client = TestClient(server, loop=loop, **kwargs) + elif isinstance(__param, BaseTestServer): + client = TestClient(__param, loop=loop, **kwargs) + else: + raise ValueError("Unknown argument type: %r" % type(__param)) + + await client.start_server() + clients.append(client) + return client + + yield go + + async def finalize() -> None: + while clients: + await clients.pop().close() + + loop.run_until_complete(finalize()) + + +@pytest.fixture +def test_client(aiohttp_client): # type: ignore[no-untyped-def] # pragma: no cover + warnings.warn( + "Deprecated, use aiohttp_client fixture instead", + DeprecationWarning, + stacklevel=2, + ) + return aiohttp_client diff --git a/aiohttp/resolver.py b/aiohttp/resolver.py new file mode 100644 index 0000000000000000000000000000000000000000..b20e5672ce51a1aadd7768301a3a1c8eb6007bf4 --- /dev/null +++ b/aiohttp/resolver.py @@ -0,0 +1,274 @@ +import asyncio +import socket +import weakref +from typing import Any, Dict, Final, List, Optional, Tuple, Type, Union + +from .abc import AbstractResolver, ResolveResult + +__all__ = ("ThreadedResolver", "AsyncResolver", "DefaultResolver") + + +try: + import aiodns + + aiodns_default = hasattr(aiodns.DNSResolver, "getaddrinfo") +except ImportError: # pragma: no cover + aiodns = None # type: ignore[assignment] + aiodns_default = False + + +_NUMERIC_SOCKET_FLAGS = socket.AI_NUMERICHOST | socket.AI_NUMERICSERV +_NAME_SOCKET_FLAGS = socket.NI_NUMERICHOST | socket.NI_NUMERICSERV +_AI_ADDRCONFIG = socket.AI_ADDRCONFIG +if hasattr(socket, "AI_MASK"): + _AI_ADDRCONFIG &= socket.AI_MASK + + +class ThreadedResolver(AbstractResolver): + """Threaded resolver. + + Uses an Executor for synchronous getaddrinfo() calls. + concurrent.futures.ThreadPoolExecutor is used by default. + """ + + def __init__(self, loop: Optional[asyncio.AbstractEventLoop] = None) -> None: + self._loop = loop or asyncio.get_running_loop() + + async def resolve( + self, host: str, port: int = 0, family: socket.AddressFamily = socket.AF_INET + ) -> List[ResolveResult]: + infos = await self._loop.getaddrinfo( + host, + port, + type=socket.SOCK_STREAM, + family=family, + flags=_AI_ADDRCONFIG, + ) + + hosts: List[ResolveResult] = [] + for family, _, proto, _, address in infos: + if family == socket.AF_INET6: + if len(address) < 3: + # IPv6 is not supported by Python build, + # or IPv6 is not enabled in the host + continue + if address[3]: + # This is essential for link-local IPv6 addresses. + # LL IPv6 is a VERY rare case. Strictly speaking, we should use + # getnameinfo() unconditionally, but performance makes sense. + resolved_host, _port = await self._loop.getnameinfo( + address, _NAME_SOCKET_FLAGS + ) + port = int(_port) + else: + resolved_host, port = address[:2] + else: # IPv4 + assert family == socket.AF_INET + resolved_host, port = address # type: ignore[misc] + hosts.append( + ResolveResult( + hostname=host, + host=resolved_host, + port=port, + family=family, + proto=proto, + flags=_NUMERIC_SOCKET_FLAGS, + ) + ) + + return hosts + + async def close(self) -> None: + pass + + +class AsyncResolver(AbstractResolver): + """Use the `aiodns` package to make asynchronous DNS lookups""" + + def __init__( + self, + loop: Optional[asyncio.AbstractEventLoop] = None, + *args: Any, + **kwargs: Any, + ) -> None: + if aiodns is None: + raise RuntimeError("Resolver requires aiodns library") + + self._loop = loop or asyncio.get_running_loop() + self._manager: Optional[_DNSResolverManager] = None + # If custom args are provided, create a dedicated resolver instance + # This means each AsyncResolver with custom args gets its own + # aiodns.DNSResolver instance + if args or kwargs: + self._resolver = aiodns.DNSResolver(*args, **kwargs) + return + # Use the shared resolver from the manager for default arguments + self._manager = _DNSResolverManager() + self._resolver = self._manager.get_resolver(self, self._loop) + + if not hasattr(self._resolver, "gethostbyname"): + # aiodns 1.1 is not available, fallback to DNSResolver.query + self.resolve = self._resolve_with_query # type: ignore + + async def resolve( + self, host: str, port: int = 0, family: socket.AddressFamily = socket.AF_INET + ) -> List[ResolveResult]: + try: + resp = await self._resolver.getaddrinfo( + host, + port=port, + type=socket.SOCK_STREAM, + family=family, + flags=_AI_ADDRCONFIG, + ) + except aiodns.error.DNSError as exc: + msg = exc.args[1] if len(exc.args) >= 1 else "DNS lookup failed" + raise OSError(None, msg) from exc + hosts: List[ResolveResult] = [] + for node in resp.nodes: + address: Union[Tuple[bytes, int], Tuple[bytes, int, int, int]] = node.addr + family = node.family + if family == socket.AF_INET6: + if len(address) > 3 and address[3]: + # This is essential for link-local IPv6 addresses. + # LL IPv6 is a VERY rare case. Strictly speaking, we should use + # getnameinfo() unconditionally, but performance makes sense. + result = await self._resolver.getnameinfo( + (address[0].decode("ascii"), *address[1:]), + _NAME_SOCKET_FLAGS, + ) + resolved_host = result.node + else: + resolved_host = address[0].decode("ascii") + port = address[1] + else: # IPv4 + assert family == socket.AF_INET + resolved_host = address[0].decode("ascii") + port = address[1] + hosts.append( + ResolveResult( + hostname=host, + host=resolved_host, + port=port, + family=family, + proto=0, + flags=_NUMERIC_SOCKET_FLAGS, + ) + ) + + if not hosts: + raise OSError(None, "DNS lookup failed") + + return hosts + + async def _resolve_with_query( + self, host: str, port: int = 0, family: int = socket.AF_INET + ) -> List[Dict[str, Any]]: + qtype: Final = "AAAA" if family == socket.AF_INET6 else "A" + + try: + resp = await self._resolver.query(host, qtype) + except aiodns.error.DNSError as exc: + msg = exc.args[1] if len(exc.args) >= 1 else "DNS lookup failed" + raise OSError(None, msg) from exc + + hosts = [] + for rr in resp: + hosts.append( + { + "hostname": host, + "host": rr.host, + "port": port, + "family": family, + "proto": 0, + "flags": socket.AI_NUMERICHOST, + } + ) + + if not hosts: + raise OSError(None, "DNS lookup failed") + + return hosts + + async def close(self) -> None: + if self._manager: + # Release the resolver from the manager if using the shared resolver + self._manager.release_resolver(self, self._loop) + self._manager = None # Clear reference to manager + self._resolver = None # type: ignore[assignment] # Clear reference to resolver + return + # Otherwise cancel our dedicated resolver + if self._resolver is not None: + self._resolver.cancel() + self._resolver = None # type: ignore[assignment] # Clear reference + + +class _DNSResolverManager: + """Manager for aiodns.DNSResolver objects. + + This class manages shared aiodns.DNSResolver instances + with no custom arguments across different event loops. + """ + + _instance: Optional["_DNSResolverManager"] = None + + def __new__(cls) -> "_DNSResolverManager": + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._init() + return cls._instance + + def _init(self) -> None: + # Use WeakKeyDictionary to allow event loops to be garbage collected + self._loop_data: weakref.WeakKeyDictionary[ + asyncio.AbstractEventLoop, + tuple["aiodns.DNSResolver", weakref.WeakSet["AsyncResolver"]], + ] = weakref.WeakKeyDictionary() + + def get_resolver( + self, client: "AsyncResolver", loop: asyncio.AbstractEventLoop + ) -> "aiodns.DNSResolver": + """Get or create the shared aiodns.DNSResolver instance for a specific event loop. + + Args: + client: The AsyncResolver instance requesting the resolver. + This is required to track resolver usage. + loop: The event loop to use for the resolver. + """ + # Create a new resolver and client set for this loop if it doesn't exist + if loop not in self._loop_data: + resolver = aiodns.DNSResolver(loop=loop) + client_set: weakref.WeakSet["AsyncResolver"] = weakref.WeakSet() + self._loop_data[loop] = (resolver, client_set) + else: + # Get the existing resolver and client set + resolver, client_set = self._loop_data[loop] + + # Register this client with the loop + client_set.add(client) + return resolver + + def release_resolver( + self, client: "AsyncResolver", loop: asyncio.AbstractEventLoop + ) -> None: + """Release the resolver for an AsyncResolver client when it's closed. + + Args: + client: The AsyncResolver instance to release. + loop: The event loop the resolver was using. + """ + # Remove client from its loop's tracking + current_loop_data = self._loop_data.get(loop) + if current_loop_data is None: + return + resolver, client_set = current_loop_data + client_set.discard(client) + # If no more clients for this loop, cancel and remove its resolver + if not client_set: + if resolver is not None: + resolver.cancel() + del self._loop_data[loop] + + +_DefaultType = Type[Union[AsyncResolver, ThreadedResolver]] +DefaultResolver: _DefaultType = AsyncResolver if aiodns_default else ThreadedResolver diff --git a/aiohttp/tcp_helpers.py b/aiohttp/tcp_helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..88b244223741ad2decb6cb612eae644fae88b2b2 --- /dev/null +++ b/aiohttp/tcp_helpers.py @@ -0,0 +1,37 @@ +"""Helper methods to tune a TCP connection""" + +import asyncio +import socket +from contextlib import suppress +from typing import Optional # noqa + +__all__ = ("tcp_keepalive", "tcp_nodelay") + + +if hasattr(socket, "SO_KEEPALIVE"): + + def tcp_keepalive(transport: asyncio.Transport) -> None: + sock = transport.get_extra_info("socket") + if sock is not None: + sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + +else: + + def tcp_keepalive(transport: asyncio.Transport) -> None: # pragma: no cover + pass + + +def tcp_nodelay(transport: asyncio.Transport, value: bool) -> None: + sock = transport.get_extra_info("socket") + + if sock is None: + return + + if sock.family not in (socket.AF_INET, socket.AF_INET6): + return + + value = bool(value) + + # socket may be closed already, on windows OSError get raised + with suppress(OSError): + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, value) diff --git a/aiohttp/typedefs.py b/aiohttp/typedefs.py new file mode 100644 index 0000000000000000000000000000000000000000..cc8c0825b4e522f7d1b6cf0058564f322fb5a905 --- /dev/null +++ b/aiohttp/typedefs.py @@ -0,0 +1,69 @@ +import json +import os +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Iterable, + Mapping, + Protocol, + Tuple, + Union, +) + +from multidict import CIMultiDict, CIMultiDictProxy, MultiDict, MultiDictProxy, istr +from yarl import URL, Query as _Query + +Query = _Query + +DEFAULT_JSON_ENCODER = json.dumps +DEFAULT_JSON_DECODER = json.loads + +if TYPE_CHECKING: + _CIMultiDict = CIMultiDict[str] + _CIMultiDictProxy = CIMultiDictProxy[str] + _MultiDict = MultiDict[str] + _MultiDictProxy = MultiDictProxy[str] + from http.cookies import BaseCookie, Morsel + + from .web import Request, StreamResponse +else: + _CIMultiDict = CIMultiDict + _CIMultiDictProxy = CIMultiDictProxy + _MultiDict = MultiDict + _MultiDictProxy = MultiDictProxy + +Byteish = Union[bytes, bytearray, memoryview] +JSONEncoder = Callable[[Any], str] +JSONDecoder = Callable[[str], Any] +LooseHeaders = Union[ + Mapping[str, str], + Mapping[istr, str], + _CIMultiDict, + _CIMultiDictProxy, + Iterable[Tuple[Union[str, istr], str]], +] +RawHeaders = Tuple[Tuple[bytes, bytes], ...] +StrOrURL = Union[str, URL] + +LooseCookiesMappings = Mapping[str, Union[str, "BaseCookie[str]", "Morsel[Any]"]] +LooseCookiesIterables = Iterable[ + Tuple[str, Union[str, "BaseCookie[str]", "Morsel[Any]"]] +] +LooseCookies = Union[ + LooseCookiesMappings, + LooseCookiesIterables, + "BaseCookie[str]", +] + +Handler = Callable[["Request"], Awaitable["StreamResponse"]] + + +class Middleware(Protocol): + def __call__( + self, request: "Request", handler: Handler + ) -> Awaitable["StreamResponse"]: ... + + +PathLike = Union[str, "os.PathLike[str]"] diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py new file mode 100644 index 0000000000000000000000000000000000000000..26484b9483a9e7e49e1975a2002535009755e133 --- /dev/null +++ b/aiohttp/web_fileresponse.py @@ -0,0 +1,418 @@ +import asyncio +import io +import os +import pathlib +import sys +from contextlib import suppress +from enum import Enum, auto +from mimetypes import MimeTypes +from stat import S_ISREG +from types import MappingProxyType +from typing import ( # noqa + IO, + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Final, + Iterator, + List, + Optional, + Set, + Tuple, + Union, + cast, +) + +from . import hdrs +from .abc import AbstractStreamWriter +from .helpers import ETAG_ANY, ETag, must_be_empty_body +from .typedefs import LooseHeaders, PathLike +from .web_exceptions import ( + HTTPForbidden, + HTTPNotFound, + HTTPNotModified, + HTTPPartialContent, + HTTPPreconditionFailed, + HTTPRequestRangeNotSatisfiable, +) +from .web_response import StreamResponse + +__all__ = ("FileResponse",) + +if TYPE_CHECKING: + from .web_request import BaseRequest + + +_T_OnChunkSent = Optional[Callable[[bytes], Awaitable[None]]] + + +NOSENDFILE: Final[bool] = bool(os.environ.get("AIOHTTP_NOSENDFILE")) + +CONTENT_TYPES: Final[MimeTypes] = MimeTypes() + +# File extension to IANA encodings map that will be checked in the order defined. +ENCODING_EXTENSIONS = MappingProxyType( + {ext: CONTENT_TYPES.encodings_map[ext] for ext in (".br", ".gz")} +) + +FALLBACK_CONTENT_TYPE = "application/octet-stream" + +# Provide additional MIME type/extension pairs to be recognized. +# https://en.wikipedia.org/wiki/List_of_archive_formats#Compression_only +ADDITIONAL_CONTENT_TYPES = MappingProxyType( + { + "application/gzip": ".gz", + "application/x-brotli": ".br", + "application/x-bzip2": ".bz2", + "application/x-compress": ".Z", + "application/x-xz": ".xz", + } +) + + +class _FileResponseResult(Enum): + """The result of the file response.""" + + SEND_FILE = auto() # Ie a regular file to send + NOT_ACCEPTABLE = auto() # Ie a socket, or non-regular file + PRE_CONDITION_FAILED = auto() # Ie If-Match or If-None-Match failed + NOT_MODIFIED = auto() # 304 Not Modified + + +# Add custom pairs and clear the encodings map so guess_type ignores them. +CONTENT_TYPES.encodings_map.clear() +for content_type, extension in ADDITIONAL_CONTENT_TYPES.items(): + CONTENT_TYPES.add_type(content_type, extension) + + +_CLOSE_FUTURES: Set[asyncio.Future[None]] = set() + + +class FileResponse(StreamResponse): + """A response object can be used to send files.""" + + def __init__( + self, + path: PathLike, + chunk_size: int = 256 * 1024, + status: int = 200, + reason: Optional[str] = None, + headers: Optional[LooseHeaders] = None, + ) -> None: + super().__init__(status=status, reason=reason, headers=headers) + + self._path = pathlib.Path(path) + self._chunk_size = chunk_size + + def _seek_and_read(self, fobj: IO[Any], offset: int, chunk_size: int) -> bytes: + fobj.seek(offset) + return fobj.read(chunk_size) # type: ignore[no-any-return] + + async def _sendfile_fallback( + self, writer: AbstractStreamWriter, fobj: IO[Any], offset: int, count: int + ) -> AbstractStreamWriter: + # To keep memory usage low,fobj is transferred in chunks + # controlled by the constructor's chunk_size argument. + + chunk_size = self._chunk_size + loop = asyncio.get_event_loop() + chunk = await loop.run_in_executor( + None, self._seek_and_read, fobj, offset, chunk_size + ) + while chunk: + await writer.write(chunk) + count = count - chunk_size + if count <= 0: + break + chunk = await loop.run_in_executor(None, fobj.read, min(chunk_size, count)) + + await writer.drain() + return writer + + async def _sendfile( + self, request: "BaseRequest", fobj: IO[Any], offset: int, count: int + ) -> AbstractStreamWriter: + writer = await super().prepare(request) + assert writer is not None + + if NOSENDFILE or self.compression: + return await self._sendfile_fallback(writer, fobj, offset, count) + + loop = request._loop + transport = request.transport + assert transport is not None + + try: + await loop.sendfile(transport, fobj, offset, count) + except NotImplementedError: + return await self._sendfile_fallback(writer, fobj, offset, count) + + await super().write_eof() + return writer + + @staticmethod + def _etag_match(etag_value: str, etags: Tuple[ETag, ...], *, weak: bool) -> bool: + if len(etags) == 1 and etags[0].value == ETAG_ANY: + return True + return any( + etag.value == etag_value for etag in etags if weak or not etag.is_weak + ) + + async def _not_modified( + self, request: "BaseRequest", etag_value: str, last_modified: float + ) -> Optional[AbstractStreamWriter]: + self.set_status(HTTPNotModified.status_code) + self._length_check = False + self.etag = etag_value + self.last_modified = last_modified + # Delete any Content-Length headers provided by user. HTTP 304 + # should always have empty response body + return await super().prepare(request) + + async def _precondition_failed( + self, request: "BaseRequest" + ) -> Optional[AbstractStreamWriter]: + self.set_status(HTTPPreconditionFailed.status_code) + self.content_length = 0 + return await super().prepare(request) + + def _make_response( + self, request: "BaseRequest", accept_encoding: str + ) -> Tuple[ + _FileResponseResult, Optional[io.BufferedReader], os.stat_result, Optional[str] + ]: + """Return the response result, io object, stat result, and encoding. + + If an uncompressed file is returned, the encoding is set to + :py:data:`None`. + + This method should be called from a thread executor + since it calls os.stat which may block. + """ + file_path, st, file_encoding = self._get_file_path_stat_encoding( + accept_encoding + ) + if not file_path: + return _FileResponseResult.NOT_ACCEPTABLE, None, st, None + + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 + if (ifmatch := request.if_match) is not None and not self._etag_match( + etag_value, ifmatch, weak=False + ): + return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding + + if ( + (unmodsince := request.if_unmodified_since) is not None + and ifmatch is None + and st.st_mtime > unmodsince.timestamp() + ): + return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 + if (ifnonematch := request.if_none_match) is not None and self._etag_match( + etag_value, ifnonematch, weak=True + ): + return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding + + if ( + (modsince := request.if_modified_since) is not None + and ifnonematch is None + and st.st_mtime <= modsince.timestamp() + ): + return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding + + fobj = file_path.open("rb") + with suppress(OSError): + # fstat() may not be available on all platforms + # Once we open the file, we want the fstat() to ensure + # the file has not changed between the first stat() + # and the open(). + st = os.stat(fobj.fileno()) + return _FileResponseResult.SEND_FILE, fobj, st, file_encoding + + def _get_file_path_stat_encoding( + self, accept_encoding: str + ) -> Tuple[Optional[pathlib.Path], os.stat_result, Optional[str]]: + file_path = self._path + for file_extension, file_encoding in ENCODING_EXTENSIONS.items(): + if file_encoding not in accept_encoding: + continue + + compressed_path = file_path.with_suffix(file_path.suffix + file_extension) + with suppress(OSError): + # Do not follow symlinks and ignore any non-regular files. + st = compressed_path.lstat() + if S_ISREG(st.st_mode): + return compressed_path, st, file_encoding + + # Fallback to the uncompressed file + st = file_path.stat() + return file_path if S_ISREG(st.st_mode) else None, st, None + + async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]: + loop = asyncio.get_running_loop() + # Encoding comparisons should be case-insensitive + # https://www.rfc-editor.org/rfc/rfc9110#section-8.4.1 + accept_encoding = request.headers.get(hdrs.ACCEPT_ENCODING, "").lower() + try: + response_result, fobj, st, file_encoding = await loop.run_in_executor( + None, self._make_response, request, accept_encoding + ) + except PermissionError: + self.set_status(HTTPForbidden.status_code) + return await super().prepare(request) + except OSError: + # Most likely to be FileNotFoundError or OSError for circular + # symlinks in python >= 3.13, so respond with 404. + self.set_status(HTTPNotFound.status_code) + return await super().prepare(request) + + # Forbid special files like sockets, pipes, devices, etc. + if response_result is _FileResponseResult.NOT_ACCEPTABLE: + self.set_status(HTTPForbidden.status_code) + return await super().prepare(request) + + if response_result is _FileResponseResult.PRE_CONDITION_FAILED: + return await self._precondition_failed(request) + + if response_result is _FileResponseResult.NOT_MODIFIED: + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + last_modified = st.st_mtime + return await self._not_modified(request, etag_value, last_modified) + + assert fobj is not None + try: + return await self._prepare_open_file(request, fobj, st, file_encoding) + finally: + # We do not await here because we do not want to wait + # for the executor to finish before returning the response + # so the connection can begin servicing another request + # as soon as possible. + close_future = loop.run_in_executor(None, fobj.close) + # Hold a strong reference to the future to prevent it from being + # garbage collected before it completes. + _CLOSE_FUTURES.add(close_future) + close_future.add_done_callback(_CLOSE_FUTURES.remove) + + async def _prepare_open_file( + self, + request: "BaseRequest", + fobj: io.BufferedReader, + st: os.stat_result, + file_encoding: Optional[str], + ) -> Optional[AbstractStreamWriter]: + status = self._status + file_size: int = st.st_size + file_mtime: float = st.st_mtime + count: int = file_size + start: Optional[int] = None + + if (ifrange := request.if_range) is None or file_mtime <= ifrange.timestamp(): + # If-Range header check: + # condition = cached date >= last modification date + # return 206 if True else 200. + # if False: + # Range header would not be processed, return 200 + # if True but Range header missing + # return 200 + try: + rng = request.http_range + start = rng.start + end: Optional[int] = rng.stop + except ValueError: + # https://tools.ietf.org/html/rfc7233: + # A server generating a 416 (Range Not Satisfiable) response to + # a byte-range request SHOULD send a Content-Range header field + # with an unsatisfied-range value. + # The complete-length in a 416 response indicates the current + # length of the selected representation. + # + # Will do the same below. Many servers ignore this and do not + # send a Content-Range header with HTTP 416 + self._headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self.set_status(HTTPRequestRangeNotSatisfiable.status_code) + return await super().prepare(request) + + # If a range request has been made, convert start, end slice + # notation into file pointer offset and count + if start is not None: + if start < 0 and end is None: # return tail of file + start += file_size + if start < 0: + # if Range:bytes=-1000 in request header but file size + # is only 200, there would be trouble without this + start = 0 + count = file_size - start + else: + # rfc7233:If the last-byte-pos value is + # absent, or if the value is greater than or equal to + # the current length of the representation data, + # the byte range is interpreted as the remainder + # of the representation (i.e., the server replaces the + # value of last-byte-pos with a value that is one less than + # the current length of the selected representation). + count = ( + min(end if end is not None else file_size, file_size) - start + ) + + if start >= file_size: + # HTTP 416 should be returned in this case. + # + # According to https://tools.ietf.org/html/rfc7233: + # If a valid byte-range-set includes at least one + # byte-range-spec with a first-byte-pos that is less than + # the current length of the representation, or at least one + # suffix-byte-range-spec with a non-zero suffix-length, + # then the byte-range-set is satisfiable. Otherwise, the + # byte-range-set is unsatisfiable. + self._headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self.set_status(HTTPRequestRangeNotSatisfiable.status_code) + return await super().prepare(request) + + status = HTTPPartialContent.status_code + # Even though you are sending the whole file, you should still + # return a HTTP 206 for a Range request. + self.set_status(status) + + # If the Content-Type header is not already set, guess it based on the + # extension of the request path. The encoding returned by guess_type + # can be ignored since the map was cleared above. + if hdrs.CONTENT_TYPE not in self._headers: + if sys.version_info >= (3, 13): + guesser = CONTENT_TYPES.guess_file_type + else: + guesser = CONTENT_TYPES.guess_type + self.content_type = guesser(self._path)[0] or FALLBACK_CONTENT_TYPE + + if file_encoding: + self._headers[hdrs.CONTENT_ENCODING] = file_encoding + self._headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING + # Disable compression if we are already sending + # a compressed file since we don't want to double + # compress. + self._compression = False + + self.etag = f"{st.st_mtime_ns:x}-{st.st_size:x}" + self.last_modified = file_mtime + self.content_length = count + + self._headers[hdrs.ACCEPT_RANGES] = "bytes" + + if status == HTTPPartialContent.status_code: + real_start = start + assert real_start is not None + self._headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( + real_start, real_start + count - 1, file_size + ) + + # If we are sending 0 bytes calling sendfile() will throw a ValueError + if count == 0 or must_be_empty_body(request.method, status): + return await super().prepare(request) + + # be aware that start could be None or int=0 here. + offset = start or 0 + + return await self._sendfile(request, fobj, offset, count) diff --git a/aiohttp/web_log.py b/aiohttp/web_log.py new file mode 100644 index 0000000000000000000000000000000000000000..d5ea2beeb152974ce5dd9f3e7990133ce04f7980 --- /dev/null +++ b/aiohttp/web_log.py @@ -0,0 +1,216 @@ +import datetime +import functools +import logging +import os +import re +import time as time_mod +from collections import namedtuple +from typing import Any, Callable, Dict, Iterable, List, Tuple # noqa + +from .abc import AbstractAccessLogger +from .web_request import BaseRequest +from .web_response import StreamResponse + +KeyMethod = namedtuple("KeyMethod", "key method") + + +class AccessLogger(AbstractAccessLogger): + """Helper object to log access. + + Usage: + log = logging.getLogger("spam") + log_format = "%a %{User-Agent}i" + access_logger = AccessLogger(log, log_format) + access_logger.log(request, response, time) + + Format: + %% The percent sign + %a Remote IP-address (IP-address of proxy if using reverse proxy) + %t Time when the request was started to process + %P The process ID of the child that serviced the request + %r First line of request + %s Response status code + %b Size of response in bytes, including HTTP headers + %T Time taken to serve the request, in seconds + %Tf Time taken to serve the request, in seconds with floating fraction + in .06f format + %D Time taken to serve the request, in microseconds + %{FOO}i request.headers['FOO'] + %{FOO}o response.headers['FOO'] + %{FOO}e os.environ['FOO'] + + """ + + LOG_FORMAT_MAP = { + "a": "remote_address", + "t": "request_start_time", + "P": "process_id", + "r": "first_request_line", + "s": "response_status", + "b": "response_size", + "T": "request_time", + "Tf": "request_time_frac", + "D": "request_time_micro", + "i": "request_header", + "o": "response_header", + } + + LOG_FORMAT = '%a %t "%r" %s %b "%{Referer}i" "%{User-Agent}i"' + FORMAT_RE = re.compile(r"%(\{([A-Za-z0-9\-_]+)\}([ioe])|[atPrsbOD]|Tf?)") + CLEANUP_RE = re.compile(r"(%[^s])") + _FORMAT_CACHE: Dict[str, Tuple[str, List[KeyMethod]]] = {} + + def __init__(self, logger: logging.Logger, log_format: str = LOG_FORMAT) -> None: + """Initialise the logger. + + logger is a logger object to be used for logging. + log_format is a string with apache compatible log format description. + + """ + super().__init__(logger, log_format=log_format) + + _compiled_format = AccessLogger._FORMAT_CACHE.get(log_format) + if not _compiled_format: + _compiled_format = self.compile_format(log_format) + AccessLogger._FORMAT_CACHE[log_format] = _compiled_format + + self._log_format, self._methods = _compiled_format + + def compile_format(self, log_format: str) -> Tuple[str, List[KeyMethod]]: + """Translate log_format into form usable by modulo formatting + + All known atoms will be replaced with %s + Also methods for formatting of those atoms will be added to + _methods in appropriate order + + For example we have log_format = "%a %t" + This format will be translated to "%s %s" + Also contents of _methods will be + [self._format_a, self._format_t] + These method will be called and results will be passed + to translated string format. + + Each _format_* method receive 'args' which is list of arguments + given to self.log + + Exceptions are _format_e, _format_i and _format_o methods which + also receive key name (by functools.partial) + + """ + # list of (key, method) tuples, we don't use an OrderedDict as users + # can repeat the same key more than once + methods = list() + + for atom in self.FORMAT_RE.findall(log_format): + if atom[1] == "": + format_key1 = self.LOG_FORMAT_MAP[atom[0]] + m = getattr(AccessLogger, "_format_%s" % atom[0]) + key_method = KeyMethod(format_key1, m) + else: + format_key2 = (self.LOG_FORMAT_MAP[atom[2]], atom[1]) + m = getattr(AccessLogger, "_format_%s" % atom[2]) + key_method = KeyMethod(format_key2, functools.partial(m, atom[1])) + + methods.append(key_method) + + log_format = self.FORMAT_RE.sub(r"%s", log_format) + log_format = self.CLEANUP_RE.sub(r"%\1", log_format) + return log_format, methods + + @staticmethod + def _format_i( + key: str, request: BaseRequest, response: StreamResponse, time: float + ) -> str: + if request is None: + return "(no headers)" + + # suboptimal, make istr(key) once + return request.headers.get(key, "-") + + @staticmethod + def _format_o( + key: str, request: BaseRequest, response: StreamResponse, time: float + ) -> str: + # suboptimal, make istr(key) once + return response.headers.get(key, "-") + + @staticmethod + def _format_a(request: BaseRequest, response: StreamResponse, time: float) -> str: + if request is None: + return "-" + ip = request.remote + return ip if ip is not None else "-" + + @staticmethod + def _format_t(request: BaseRequest, response: StreamResponse, time: float) -> str: + tz = datetime.timezone(datetime.timedelta(seconds=-time_mod.timezone)) + now = datetime.datetime.now(tz) + start_time = now - datetime.timedelta(seconds=time) + return start_time.strftime("[%d/%b/%Y:%H:%M:%S %z]") + + @staticmethod + def _format_P(request: BaseRequest, response: StreamResponse, time: float) -> str: + return "<%s>" % os.getpid() + + @staticmethod + def _format_r(request: BaseRequest, response: StreamResponse, time: float) -> str: + if request is None: + return "-" + return "{} {} HTTP/{}.{}".format( + request.method, + request.path_qs, + request.version.major, + request.version.minor, + ) + + @staticmethod + def _format_s(request: BaseRequest, response: StreamResponse, time: float) -> int: + return response.status + + @staticmethod + def _format_b(request: BaseRequest, response: StreamResponse, time: float) -> int: + return response.body_length + + @staticmethod + def _format_T(request: BaseRequest, response: StreamResponse, time: float) -> str: + return str(round(time)) + + @staticmethod + def _format_Tf(request: BaseRequest, response: StreamResponse, time: float) -> str: + return "%06f" % time + + @staticmethod + def _format_D(request: BaseRequest, response: StreamResponse, time: float) -> str: + return str(round(time * 1000000)) + + def _format_line( + self, request: BaseRequest, response: StreamResponse, time: float + ) -> Iterable[Tuple[str, Callable[[BaseRequest, StreamResponse, float], str]]]: + return [(key, method(request, response, time)) for key, method in self._methods] + + @property + def enabled(self) -> bool: + """Check if logger is enabled.""" + # Avoid formatting the log line if it will not be emitted. + return self.logger.isEnabledFor(logging.INFO) + + def log(self, request: BaseRequest, response: StreamResponse, time: float) -> None: + try: + fmt_info = self._format_line(request, response, time) + + values = list() + extra = dict() + for key, value in fmt_info: + values.append(value) + + if key.__class__ is str: + extra[key] = value + else: + k1, k2 = key # type: ignore[misc] + dct = extra.get(k1, {}) # type: ignore[var-annotated,has-type] + dct[k2] = value # type: ignore[index,has-type] + extra[k1] = dct # type: ignore[has-type,assignment] + + self.logger.info(self._log_format % tuple(values), extra=extra) + except Exception: + self.logger.exception("Error in logging") diff --git a/aiohttp/web_routedef.py b/aiohttp/web_routedef.py new file mode 100644 index 0000000000000000000000000000000000000000..f51b6cd00815a4daeabf7ef269a3225b2b764503 --- /dev/null +++ b/aiohttp/web_routedef.py @@ -0,0 +1,214 @@ +import abc +import os # noqa +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterator, + List, + Optional, + Sequence, + Type, + Union, + overload, +) + +import attr + +from . import hdrs +from .abc import AbstractView +from .typedefs import Handler, PathLike + +if TYPE_CHECKING: + from .web_request import Request + from .web_response import StreamResponse + from .web_urldispatcher import AbstractRoute, UrlDispatcher +else: + Request = StreamResponse = UrlDispatcher = AbstractRoute = None + + +__all__ = ( + "AbstractRouteDef", + "RouteDef", + "StaticDef", + "RouteTableDef", + "head", + "options", + "get", + "post", + "patch", + "put", + "delete", + "route", + "view", + "static", +) + + +class AbstractRouteDef(abc.ABC): + @abc.abstractmethod + def register(self, router: UrlDispatcher) -> List[AbstractRoute]: + pass # pragma: no cover + + +_HandlerType = Union[Type[AbstractView], Handler] + + +@attr.s(auto_attribs=True, frozen=True, repr=False, slots=True) +class RouteDef(AbstractRouteDef): + method: str + path: str + handler: _HandlerType + kwargs: Dict[str, Any] + + def __repr__(self) -> str: + info = [] + for name, value in sorted(self.kwargs.items()): + info.append(f", {name}={value!r}") + return " {handler.__name__!r}{info}>".format( + method=self.method, path=self.path, handler=self.handler, info="".join(info) + ) + + def register(self, router: UrlDispatcher) -> List[AbstractRoute]: + if self.method in hdrs.METH_ALL: + reg = getattr(router, "add_" + self.method.lower()) + return [reg(self.path, self.handler, **self.kwargs)] + else: + return [ + router.add_route(self.method, self.path, self.handler, **self.kwargs) + ] + + +@attr.s(auto_attribs=True, frozen=True, repr=False, slots=True) +class StaticDef(AbstractRouteDef): + prefix: str + path: PathLike + kwargs: Dict[str, Any] + + def __repr__(self) -> str: + info = [] + for name, value in sorted(self.kwargs.items()): + info.append(f", {name}={value!r}") + return " {path}{info}>".format( + prefix=self.prefix, path=self.path, info="".join(info) + ) + + def register(self, router: UrlDispatcher) -> List[AbstractRoute]: + resource = router.add_static(self.prefix, self.path, **self.kwargs) + routes = resource.get_info().get("routes", {}) + return list(routes.values()) + + +def route(method: str, path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return RouteDef(method, path, handler, kwargs) + + +def head(path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return route(hdrs.METH_HEAD, path, handler, **kwargs) + + +def options(path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return route(hdrs.METH_OPTIONS, path, handler, **kwargs) + + +def get( + path: str, + handler: _HandlerType, + *, + name: Optional[str] = None, + allow_head: bool = True, + **kwargs: Any, +) -> RouteDef: + return route( + hdrs.METH_GET, path, handler, name=name, allow_head=allow_head, **kwargs + ) + + +def post(path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return route(hdrs.METH_POST, path, handler, **kwargs) + + +def put(path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return route(hdrs.METH_PUT, path, handler, **kwargs) + + +def patch(path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return route(hdrs.METH_PATCH, path, handler, **kwargs) + + +def delete(path: str, handler: _HandlerType, **kwargs: Any) -> RouteDef: + return route(hdrs.METH_DELETE, path, handler, **kwargs) + + +def view(path: str, handler: Type[AbstractView], **kwargs: Any) -> RouteDef: + return route(hdrs.METH_ANY, path, handler, **kwargs) + + +def static(prefix: str, path: PathLike, **kwargs: Any) -> StaticDef: + return StaticDef(prefix, path, kwargs) + + +_Deco = Callable[[_HandlerType], _HandlerType] + + +class RouteTableDef(Sequence[AbstractRouteDef]): + """Route definition table""" + + def __init__(self) -> None: + self._items: List[AbstractRouteDef] = [] + + def __repr__(self) -> str: + return f"" + + @overload + def __getitem__(self, index: int) -> AbstractRouteDef: ... + + @overload + def __getitem__(self, index: slice) -> List[AbstractRouteDef]: ... + + def __getitem__(self, index): # type: ignore[no-untyped-def] + return self._items[index] + + def __iter__(self) -> Iterator[AbstractRouteDef]: + return iter(self._items) + + def __len__(self) -> int: + return len(self._items) + + def __contains__(self, item: object) -> bool: + return item in self._items + + def route(self, method: str, path: str, **kwargs: Any) -> _Deco: + def inner(handler: _HandlerType) -> _HandlerType: + self._items.append(RouteDef(method, path, handler, kwargs)) + return handler + + return inner + + def head(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_HEAD, path, **kwargs) + + def get(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_GET, path, **kwargs) + + def post(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_POST, path, **kwargs) + + def put(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_PUT, path, **kwargs) + + def patch(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_PATCH, path, **kwargs) + + def delete(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_DELETE, path, **kwargs) + + def options(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_OPTIONS, path, **kwargs) + + def view(self, path: str, **kwargs: Any) -> _Deco: + return self.route(hdrs.METH_ANY, path, **kwargs) + + def static(self, prefix: str, path: PathLike, **kwargs: Any) -> None: + self._items.append(StaticDef(prefix, path, kwargs)) diff --git a/bin/accelerate b/bin/accelerate new file mode 100644 index 0000000000000000000000000000000000000000..e488d15619100ac9f7e86ae5f099da39b6b37ed6 --- /dev/null +++ b/bin/accelerate @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from accelerate.commands.accelerate_cli import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/accelerate-config b/bin/accelerate-config new file mode 100644 index 0000000000000000000000000000000000000000..9e9bbc3397ee62a65977839c8051919fa18d47e4 --- /dev/null +++ b/bin/accelerate-config @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from accelerate.commands.config import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/accelerate-estimate-memory b/bin/accelerate-estimate-memory new file mode 100644 index 0000000000000000000000000000000000000000..81384690d5f560d26bd019544c5106ced63f9712 --- /dev/null +++ b/bin/accelerate-estimate-memory @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from accelerate.commands.estimate import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/accelerate-launch b/bin/accelerate-launch new file mode 100644 index 0000000000000000000000000000000000000000..c8a9db4ad0c8097b0d02a1f6614bad43b53b7d7c --- /dev/null +++ b/bin/accelerate-launch @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from accelerate.commands.launch import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/accelerate-merge-weights b/bin/accelerate-merge-weights new file mode 100644 index 0000000000000000000000000000000000000000..85511001ade8e5c36d7b96bf5703022b36277445 --- /dev/null +++ b/bin/accelerate-merge-weights @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from accelerate.commands.merge import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/datasets-cli b/bin/datasets-cli new file mode 100644 index 0000000000000000000000000000000000000000..422f6a8845ccc4cb617b8f1bcaf6782e02977692 --- /dev/null +++ b/bin/datasets-cli @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from datasets.commands.datasets_cli import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/diffusers-cli b/bin/diffusers-cli new file mode 100644 index 0000000000000000000000000000000000000000..73e21c8217f758f118acf6dd42c7047d7b540a85 --- /dev/null +++ b/bin/diffusers-cli @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from diffusers.commands.diffusers_cli import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/f2py b/bin/f2py new file mode 100644 index 0000000000000000000000000000000000000000..0ebc9f560b48be5bf044480fbd7efaf33fdddba8 --- /dev/null +++ b/bin/f2py @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from numpy.f2py.f2py2e import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/get_gprof b/bin/get_gprof new file mode 100644 index 0000000000000000000000000000000000000000..6def81b896a97969ff6c587c2e27965f580ce65b --- /dev/null +++ b/bin/get_gprof @@ -0,0 +1,75 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Copyright (c) 2008-2016 California Institute of Technology. +# Copyright (c) 2016-2025 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +''' +build profile graph for the given instance + +running: + $ get_gprof + +executes: + gprof2dot -f pstats .prof | dot -Tpng -o .call.png + +where: + are arguments for gprof2dot, such as "-n 5 -e 5" + is code to create the instance to profile + is the class of the instance (i.e. type(instance)) + +For example: + $ get_gprof -n 5 -e 1 "import numpy; numpy.array([1,2])" + +will create 'ndarray.call.png' with the profile graph for numpy.array([1,2]), +where '-n 5' eliminates nodes below 5% threshold, similarly '-e 1' eliminates +edges below 1% threshold +''' + +if __name__ == "__main__": + import sys + if len(sys.argv) < 2: + print ("Please provide an object instance (e.g. 'import math; math.pi')") + sys.exit() + # grab args for gprof2dot + args = sys.argv[1:-1] + args = ' '.join(args) + # last arg builds the object + obj = sys.argv[-1] + obj = obj.split(';') + # multi-line prep for generating an instance + for line in obj[:-1]: + exec(line) + # one-line generation of an instance + try: + obj = eval(obj[-1]) + except Exception: + print ("Error processing object instance") + sys.exit() + + # get object 'name' + objtype = type(obj) + name = getattr(objtype, '__name__', getattr(objtype, '__class__', objtype)) + + # profile dumping an object + import dill + import os + import cProfile + #name = os.path.splitext(os.path.basename(__file__))[0] + cProfile.run("dill.dumps(obj)", filename="%s.prof" % name) + msg = "gprof2dot -f pstats %s %s.prof | dot -Tpng -o %s.call.png" % (args, name, name) + try: + res = os.system(msg) + except Exception: + print ("Please verify install of 'gprof2dot' to view profile graphs") + if res: + print ("Please verify install of 'gprof2dot' to view profile graphs") + + # get stats + f_prof = "%s.prof" % name + import pstats + stats = pstats.Stats(f_prof, stream=sys.stdout) + stats.strip_dirs().sort_stats('cumtime') + stats.print_stats(20) #XXX: save to file instead of print top 20? + os.remove(f_prof) diff --git a/bin/get_objgraph b/bin/get_objgraph new file mode 100644 index 0000000000000000000000000000000000000000..2b9179e557d5f9e7bb5d3a46d9854cf7b04fa90a --- /dev/null +++ b/bin/get_objgraph @@ -0,0 +1,54 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Copyright (c) 2008-2016 California Institute of Technology. +# Copyright (c) 2016-2025 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +""" +display the reference paths for objects in ``dill.types`` or a .pkl file + +Notes: + the generated image is useful in showing the pointer references in + objects that are or can be pickled. Any object in ``dill.objects`` + listed in ``dill.load_types(picklable=True, unpicklable=True)`` works. + +Examples:: + + $ get_objgraph ArrayType + Image generated as ArrayType.png +""" + +import dill as pickle +#pickle.debug.trace(True) +#import pickle + +# get all objects for testing +from dill import load_types +load_types(pickleable=True,unpickleable=True) +from dill import objects + +if __name__ == "__main__": + import sys + if len(sys.argv) != 2: + print ("Please provide exactly one file or type name (e.g. 'IntType')") + msg = "\n" + for objtype in list(objects.keys())[:40]: + msg += objtype + ', ' + print (msg + "...") + else: + objtype = str(sys.argv[-1]) + try: + obj = objects[objtype] + except KeyError: + obj = pickle.load(open(objtype,'rb')) + import os + objtype = os.path.splitext(objtype)[0] + try: + import objgraph + objgraph.show_refs(obj, filename=objtype+'.png') + except ImportError: + print ("Please install 'objgraph' to view object graphs") + + +# EOF diff --git a/bin/hf b/bin/hf new file mode 100644 index 0000000000000000000000000000000000000000..9de37bb87e50efac6424bb59d391af6dcf60a879 --- /dev/null +++ b/bin/hf @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from huggingface_hub.cli.hf import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/httpx b/bin/httpx new file mode 100644 index 0000000000000000000000000000000000000000..933c333a43347d0d3faf493ee5fea64cc87b6042 --- /dev/null +++ b/bin/httpx @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from httpx import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/isympy b/bin/isympy new file mode 100644 index 0000000000000000000000000000000000000000..cb11fcecdf47b7af48a232657c899deaf681d252 --- /dev/null +++ b/bin/isympy @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from isympy import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/markdown-it b/bin/markdown-it new file mode 100644 index 0000000000000000000000000000000000000000..782416a14ef2e99a1597488268af21b2d6485524 --- /dev/null +++ b/bin/markdown-it @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from markdown_it.cli.parse import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/normalizer b/bin/normalizer new file mode 100644 index 0000000000000000000000000000000000000000..43b165df907e2f4041f6efaaaa22a3ee95524cf8 --- /dev/null +++ b/bin/normalizer @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from charset_normalizer.cli import cli_detect +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(cli_detect()) diff --git a/bin/proton b/bin/proton new file mode 100644 index 0000000000000000000000000000000000000000..1a53bd466a2bbdaa4c26c06c07eb69aac3bfeb0b --- /dev/null +++ b/bin/proton @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from triton.profiler.proton import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/proton-viewer b/bin/proton-viewer new file mode 100644 index 0000000000000000000000000000000000000000..04ea93429160d03dacb085ec88e82c650ea34ee1 --- /dev/null +++ b/bin/proton-viewer @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from triton.profiler.viewer import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/pygmentize b/bin/pygmentize new file mode 100644 index 0000000000000000000000000000000000000000..edfcc595b2afdab7d3291bf7388af9a2539d6195 --- /dev/null +++ b/bin/pygmentize @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from pygments.cmdline import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/tiny-agents b/bin/tiny-agents new file mode 100644 index 0000000000000000000000000000000000000000..d72caacf25a470afb3f15041c6d983ed889495f7 --- /dev/null +++ b/bin/tiny-agents @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from huggingface_hub.inference._mcp.cli import app +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(app()) diff --git a/bin/torchfrtrace b/bin/torchfrtrace new file mode 100644 index 0000000000000000000000000000000000000000..a3402b1a26efb75bc0d8bc05b0a558df78338a74 --- /dev/null +++ b/bin/torchfrtrace @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from torch.distributed.flight_recorder.fr_trace import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/torchrun b/bin/torchrun new file mode 100644 index 0000000000000000000000000000000000000000..f9bcec673c970a7d3bfd764b77044bda2a96f1be --- /dev/null +++ b/bin/torchrun @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from torch.distributed.run import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/tqdm b/bin/tqdm new file mode 100644 index 0000000000000000000000000000000000000000..02acad2052501e747b5742882f72565dbadd084e --- /dev/null +++ b/bin/tqdm @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from tqdm.cli import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/transformers b/bin/transformers new file mode 100644 index 0000000000000000000000000000000000000000..15b70348c494dc1c0883094cd66bb41619294de1 --- /dev/null +++ b/bin/transformers @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from transformers.cli.transformers import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/trl b/bin/trl new file mode 100644 index 0000000000000000000000000000000000000000..3d60eab3238fb9ee92c1c002384f19b4ba22bcd0 --- /dev/null +++ b/bin/trl @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from trl.cli import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/typer b/bin/typer new file mode 100644 index 0000000000000000000000000000000000000000..c1eb2c67a5552a522a16336bc6302a8dae26ad82 --- /dev/null +++ b/bin/typer @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from typer.cli import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/bin/undill b/bin/undill new file mode 100644 index 0000000000000000000000000000000000000000..750293cf0fde546f064816c64e51ab9225b8e08e --- /dev/null +++ b/bin/undill @@ -0,0 +1,22 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Copyright (c) 2008-2016 California Institute of Technology. +# Copyright (c) 2016-2025 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +""" +unpickle the contents of a pickled object file + +Examples:: + + $ undill hello.pkl + ['hello', 'world'] +""" + +if __name__ == '__main__': + import sys + import dill + for file in sys.argv[1:]: + print (dill.load(open(file,'rb'))) + diff --git a/bin/unsloth b/bin/unsloth new file mode 100644 index 0000000000000000000000000000000000000000..3b5f780e66d197bea059f938e7ec1b4e441ff959 --- /dev/null +++ b/bin/unsloth @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from unsloth_cli import app +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(app()) diff --git a/bin/wheel b/bin/wheel new file mode 100644 index 0000000000000000000000000000000000000000..685dc6d7a1c9621cfbf0e6542c8ca9b56ff1a411 --- /dev/null +++ b/bin/wheel @@ -0,0 +1,10 @@ +#!/home/zeus/miniconda3/envs/cloudspace/bin/python +# -*- coding: utf-8 -*- +import sys +from wheel._commands import main +if __name__ == "__main__": + if sys.argv[0].endswith("-script.pyw"): + sys.argv[0] = sys.argv[0][:-11] + elif sys.argv[0].endswith(".exe"): + sys.argv[0] = sys.argv[0][:-4] + sys.exit(main()) diff --git a/frozenlist/__init__.py b/frozenlist/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..41c859588035a89d2706c37247f1d92ee0e7c5a7 --- /dev/null +++ b/frozenlist/__init__.py @@ -0,0 +1,86 @@ +import os +import types +from collections.abc import MutableSequence +from functools import total_ordering + +__version__ = "1.8.0" + +__all__ = ("FrozenList", "PyFrozenList") # type: Tuple[str, ...] + + +NO_EXTENSIONS = bool(os.environ.get("FROZENLIST_NO_EXTENSIONS")) # type: bool + + +@total_ordering +class FrozenList(MutableSequence): + __slots__ = ("_frozen", "_items") + __class_getitem__ = classmethod(types.GenericAlias) + + def __init__(self, items=None): + self._frozen = False + if items is not None: + items = list(items) + else: + items = [] + self._items = items + + @property + def frozen(self): + return self._frozen + + def freeze(self): + self._frozen = True + + def __getitem__(self, index): + return self._items[index] + + def __setitem__(self, index, value): + if self._frozen: + raise RuntimeError("Cannot modify frozen list.") + self._items[index] = value + + def __delitem__(self, index): + if self._frozen: + raise RuntimeError("Cannot modify frozen list.") + del self._items[index] + + def __len__(self): + return self._items.__len__() + + def __iter__(self): + return self._items.__iter__() + + def __reversed__(self): + return self._items.__reversed__() + + def __eq__(self, other): + return list(self) == other + + def __le__(self, other): + return list(self) <= other + + def insert(self, pos, item): + if self._frozen: + raise RuntimeError("Cannot modify frozen list.") + self._items.insert(pos, item) + + def __repr__(self): + return f"" + + def __hash__(self): + if self._frozen: + return hash(tuple(self)) + else: + raise RuntimeError("Cannot hash unfrozen list.") + + +PyFrozenList = FrozenList + + +if not NO_EXTENSIONS: + try: + from ._frozenlist import FrozenList as CFrozenList # type: ignore + except ImportError: # pragma: no cover + pass + else: + FrozenList = CFrozenList # type: ignore diff --git a/frozenlist/__init__.pyi b/frozenlist/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..ae803ef6aad72f57e7379db5a2044a95f214df7b --- /dev/null +++ b/frozenlist/__init__.pyi @@ -0,0 +1,47 @@ +from typing import ( + Generic, + Iterable, + Iterator, + List, + MutableSequence, + Optional, + TypeVar, + Union, + overload, +) + +_T = TypeVar("_T") +_Arg = Union[List[_T], Iterable[_T]] + +class FrozenList(MutableSequence[_T], Generic[_T]): + def __init__(self, items: Optional[_Arg[_T]] = None) -> None: ... + @property + def frozen(self) -> bool: ... + def freeze(self) -> None: ... + @overload + def __getitem__(self, i: int) -> _T: ... + @overload + def __getitem__(self, s: slice) -> FrozenList[_T]: ... + @overload + def __setitem__(self, i: int, o: _T) -> None: ... + @overload + def __setitem__(self, s: slice, o: Iterable[_T]) -> None: ... + @overload + def __delitem__(self, i: int) -> None: ... + @overload + def __delitem__(self, i: slice) -> None: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[_T]: ... + def __reversed__(self) -> Iterator[_T]: ... + def __eq__(self, other: object) -> bool: ... + def __le__(self, other: FrozenList[_T]) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __lt__(self, other: FrozenList[_T]) -> bool: ... + def __ge__(self, other: FrozenList[_T]) -> bool: ... + def __gt__(self, other: FrozenList[_T]) -> bool: ... + def insert(self, pos: int, item: _T) -> None: ... + def __repr__(self) -> str: ... + def __hash__(self) -> int: ... + +# types for C accelerators are the same +CFrozenList = PyFrozenList = FrozenList diff --git a/frozenlist/_frozenlist.pyx b/frozenlist/_frozenlist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..a82d8c8ff6c909073deb5efd29ec5fa6075537fc --- /dev/null +++ b/frozenlist/_frozenlist.pyx @@ -0,0 +1,148 @@ +# cython: freethreading_compatible = True +# distutils: language = c++ + +from cpython.bool cimport PyBool_FromLong +from libcpp.atomic cimport atomic + +import copy +import types +from collections.abc import MutableSequence + + +cdef class FrozenList: + __class_getitem__ = classmethod(types.GenericAlias) + + cdef atomic[bint] _frozen + cdef list _items + + def __init__(self, items=None): + self._frozen.store(False) + if items is not None: + items = list(items) + else: + items = [] + self._items = items + + @property + def frozen(self): + return PyBool_FromLong(self._frozen.load()) + + cdef object _check_frozen(self): + if self._frozen.load(): + raise RuntimeError("Cannot modify frozen list.") + + cdef inline object _fast_len(self): + return len(self._items) + + def freeze(self): + self._frozen.store(True) + + def __getitem__(self, index): + return self._items[index] + + def __setitem__(self, index, value): + self._check_frozen() + self._items[index] = value + + def __delitem__(self, index): + self._check_frozen() + del self._items[index] + + def __len__(self): + return self._fast_len() + + def __iter__(self): + return self._items.__iter__() + + def __reversed__(self): + return self._items.__reversed__() + + def __richcmp__(self, other, op): + if op == 0: # < + return list(self) < other + if op == 1: # <= + return list(self) <= other + if op == 2: # == + return list(self) == other + if op == 3: # != + return list(self) != other + if op == 4: # > + return list(self) > other + if op == 5: # => + return list(self) >= other + + def insert(self, pos, item): + self._check_frozen() + self._items.insert(pos, item) + + def __contains__(self, item): + return item in self._items + + def __iadd__(self, items): + self._check_frozen() + self._items += list(items) + return self + + def index(self, item): + return self._items.index(item) + + def remove(self, item): + self._check_frozen() + self._items.remove(item) + + def clear(self): + self._check_frozen() + self._items.clear() + + def extend(self, items): + self._check_frozen() + self._items += list(items) + + def reverse(self): + self._check_frozen() + self._items.reverse() + + def pop(self, index=-1): + self._check_frozen() + return self._items.pop(index) + + def append(self, item): + self._check_frozen() + return self._items.append(item) + + def count(self, item): + return self._items.count(item) + + def __repr__(self): + return ''.format(self._frozen.load(), + self._items) + + def __hash__(self): + if self._frozen.load(): + return hash(tuple(self._items)) + else: + raise RuntimeError("Cannot hash unfrozen list.") + + def __deepcopy__(self, memo): + cdef FrozenList new_list + obj_id = id(self) + + # Return existing copy if already processed (circular reference) + if obj_id in memo: + return memo[obj_id] + + # Create new instance and register immediately + new_list = self.__class__([]) + memo[obj_id] = new_list + + # Deep copy items + new_list._items[:] = [copy.deepcopy(item, memo) for item in self._items] + + # Preserve frozen state + if self._frozen.load(): + new_list.freeze() + + return new_list + + +MutableSequence.register(FrozenList) diff --git a/frozenlist/py.typed b/frozenlist/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..f5642f79f21d872f010979dcf6f0c4a415acc19d --- /dev/null +++ b/frozenlist/py.typed @@ -0,0 +1 @@ +Marker diff --git a/httpcore/__init__.py b/httpcore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9a92dc4a440bdf6f259ec1083c89c817eb7b631b --- /dev/null +++ b/httpcore/__init__.py @@ -0,0 +1,141 @@ +from ._api import request, stream +from ._async import ( + AsyncConnectionInterface, + AsyncConnectionPool, + AsyncHTTP2Connection, + AsyncHTTP11Connection, + AsyncHTTPConnection, + AsyncHTTPProxy, + AsyncSOCKSProxy, +) +from ._backends.base import ( + SOCKET_OPTION, + AsyncNetworkBackend, + AsyncNetworkStream, + NetworkBackend, + NetworkStream, +) +from ._backends.mock import AsyncMockBackend, AsyncMockStream, MockBackend, MockStream +from ._backends.sync import SyncBackend +from ._exceptions import ( + ConnectError, + ConnectionNotAvailable, + ConnectTimeout, + LocalProtocolError, + NetworkError, + PoolTimeout, + ProtocolError, + ProxyError, + ReadError, + ReadTimeout, + RemoteProtocolError, + TimeoutException, + UnsupportedProtocol, + WriteError, + WriteTimeout, +) +from ._models import URL, Origin, Proxy, Request, Response +from ._ssl import default_ssl_context +from ._sync import ( + ConnectionInterface, + ConnectionPool, + HTTP2Connection, + HTTP11Connection, + HTTPConnection, + HTTPProxy, + SOCKSProxy, +) + +# The 'httpcore.AnyIOBackend' class is conditional on 'anyio' being installed. +try: + from ._backends.anyio import AnyIOBackend +except ImportError: # pragma: nocover + + class AnyIOBackend: # type: ignore + def __init__(self, *args, **kwargs): # type: ignore + msg = ( + "Attempted to use 'httpcore.AnyIOBackend' but 'anyio' is not installed." + ) + raise RuntimeError(msg) + + +# The 'httpcore.TrioBackend' class is conditional on 'trio' being installed. +try: + from ._backends.trio import TrioBackend +except ImportError: # pragma: nocover + + class TrioBackend: # type: ignore + def __init__(self, *args, **kwargs): # type: ignore + msg = "Attempted to use 'httpcore.TrioBackend' but 'trio' is not installed." + raise RuntimeError(msg) + + +__all__ = [ + # top-level requests + "request", + "stream", + # models + "Origin", + "URL", + "Request", + "Response", + "Proxy", + # async + "AsyncHTTPConnection", + "AsyncConnectionPool", + "AsyncHTTPProxy", + "AsyncHTTP11Connection", + "AsyncHTTP2Connection", + "AsyncConnectionInterface", + "AsyncSOCKSProxy", + # sync + "HTTPConnection", + "ConnectionPool", + "HTTPProxy", + "HTTP11Connection", + "HTTP2Connection", + "ConnectionInterface", + "SOCKSProxy", + # network backends, implementations + "SyncBackend", + "AnyIOBackend", + "TrioBackend", + # network backends, mock implementations + "AsyncMockBackend", + "AsyncMockStream", + "MockBackend", + "MockStream", + # network backends, interface + "AsyncNetworkStream", + "AsyncNetworkBackend", + "NetworkStream", + "NetworkBackend", + # util + "default_ssl_context", + "SOCKET_OPTION", + # exceptions + "ConnectionNotAvailable", + "ProxyError", + "ProtocolError", + "LocalProtocolError", + "RemoteProtocolError", + "UnsupportedProtocol", + "TimeoutException", + "PoolTimeout", + "ConnectTimeout", + "ReadTimeout", + "WriteTimeout", + "NetworkError", + "ConnectError", + "ReadError", + "WriteError", +] + +__version__ = "1.0.9" + + +__locals = locals() +for __name in __all__: + # Exclude SOCKET_OPTION, it causes AttributeError on Python 3.14 + if not __name.startswith(("__", "SOCKET_OPTION")): + setattr(__locals[__name], "__module__", "httpcore") # noqa diff --git a/httpcore/_api.py b/httpcore/_api.py new file mode 100644 index 0000000000000000000000000000000000000000..38b961d10de88bebc98c758d0d1f14af1e7c0370 --- /dev/null +++ b/httpcore/_api.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import contextlib +import typing + +from ._models import URL, Extensions, HeaderTypes, Response +from ._sync.connection_pool import ConnectionPool + + +def request( + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes | typing.Iterator[bytes] | None = None, + extensions: Extensions | None = None, +) -> Response: + """ + Sends an HTTP request, returning the response. + + ``` + response = httpcore.request("GET", "https://www.example.com/") + ``` + + Arguments: + method: The HTTP method for the request. Typically one of `"GET"`, + `"OPTIONS"`, `"HEAD"`, `"POST"`, `"PUT"`, `"PATCH"`, or `"DELETE"`. + url: The URL of the HTTP request. Either as an instance of `httpcore.URL`, + or as str/bytes. + headers: The HTTP request headers. Either as a dictionary of str/bytes, + or as a list of two-tuples of str/bytes. + content: The content of the request body. Either as bytes, + or as a bytes iterator. + extensions: A dictionary of optional extra information included on the request. + Possible keys include `"timeout"`. + + Returns: + An instance of `httpcore.Response`. + """ + with ConnectionPool() as pool: + return pool.request( + method=method, + url=url, + headers=headers, + content=content, + extensions=extensions, + ) + + +@contextlib.contextmanager +def stream( + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes | typing.Iterator[bytes] | None = None, + extensions: Extensions | None = None, +) -> typing.Iterator[Response]: + """ + Sends an HTTP request, returning the response within a content manager. + + ``` + with httpcore.stream("GET", "https://www.example.com/") as response: + ... + ``` + + When using the `stream()` function, the body of the response will not be + automatically read. If you want to access the response body you should + either use `content = response.read()`, or `for chunk in response.iter_content()`. + + Arguments: + method: The HTTP method for the request. Typically one of `"GET"`, + `"OPTIONS"`, `"HEAD"`, `"POST"`, `"PUT"`, `"PATCH"`, or `"DELETE"`. + url: The URL of the HTTP request. Either as an instance of `httpcore.URL`, + or as str/bytes. + headers: The HTTP request headers. Either as a dictionary of str/bytes, + or as a list of two-tuples of str/bytes. + content: The content of the request body. Either as bytes, + or as a bytes iterator. + extensions: A dictionary of optional extra information included on the request. + Possible keys include `"timeout"`. + + Returns: + An instance of `httpcore.Response`. + """ + with ConnectionPool() as pool: + with pool.stream( + method=method, + url=url, + headers=headers, + content=content, + extensions=extensions, + ) as response: + yield response diff --git a/httpcore/_exceptions.py b/httpcore/_exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..bc28d44f55bdc4b872951a74780469a3999d9ab4 --- /dev/null +++ b/httpcore/_exceptions.py @@ -0,0 +1,81 @@ +import contextlib +import typing + +ExceptionMapping = typing.Mapping[typing.Type[Exception], typing.Type[Exception]] + + +@contextlib.contextmanager +def map_exceptions(map: ExceptionMapping) -> typing.Iterator[None]: + try: + yield + except Exception as exc: # noqa: PIE786 + for from_exc, to_exc in map.items(): + if isinstance(exc, from_exc): + raise to_exc(exc) from exc + raise # pragma: nocover + + +class ConnectionNotAvailable(Exception): + pass + + +class ProxyError(Exception): + pass + + +class UnsupportedProtocol(Exception): + pass + + +class ProtocolError(Exception): + pass + + +class RemoteProtocolError(ProtocolError): + pass + + +class LocalProtocolError(ProtocolError): + pass + + +# Timeout errors + + +class TimeoutException(Exception): + pass + + +class PoolTimeout(TimeoutException): + pass + + +class ConnectTimeout(TimeoutException): + pass + + +class ReadTimeout(TimeoutException): + pass + + +class WriteTimeout(TimeoutException): + pass + + +# Network errors + + +class NetworkError(Exception): + pass + + +class ConnectError(NetworkError): + pass + + +class ReadError(NetworkError): + pass + + +class WriteError(NetworkError): + pass diff --git a/httpcore/_models.py b/httpcore/_models.py new file mode 100644 index 0000000000000000000000000000000000000000..8a65f13347d6621289a166d08123cbc8e1ad0157 --- /dev/null +++ b/httpcore/_models.py @@ -0,0 +1,516 @@ +from __future__ import annotations + +import base64 +import ssl +import typing +import urllib.parse + +# Functions for typechecking... + + +ByteOrStr = typing.Union[bytes, str] +HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]] +HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr] +HeaderTypes = typing.Union[HeadersAsSequence, HeadersAsMapping, None] + +Extensions = typing.MutableMapping[str, typing.Any] + + +def enforce_bytes(value: bytes | str, *, name: str) -> bytes: + """ + Any arguments that are ultimately represented as bytes can be specified + either as bytes or as strings. + + However we enforce that any string arguments must only contain characters in + the plain ASCII range. chr(0)...chr(127). If you need to use characters + outside that range then be precise, and use a byte-wise argument. + """ + if isinstance(value, str): + try: + return value.encode("ascii") + except UnicodeEncodeError: + raise TypeError(f"{name} strings may not include unicode characters.") + elif isinstance(value, bytes): + return value + + seen_type = type(value).__name__ + raise TypeError(f"{name} must be bytes or str, but got {seen_type}.") + + +def enforce_url(value: URL | bytes | str, *, name: str) -> URL: + """ + Type check for URL parameters. + """ + if isinstance(value, (bytes, str)): + return URL(value) + elif isinstance(value, URL): + return value + + seen_type = type(value).__name__ + raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.") + + +def enforce_headers( + value: HeadersAsMapping | HeadersAsSequence | None = None, *, name: str +) -> list[tuple[bytes, bytes]]: + """ + Convienence function that ensure all items in request or response headers + are either bytes or strings in the plain ASCII range. + """ + if value is None: + return [] + elif isinstance(value, typing.Mapping): + return [ + ( + enforce_bytes(k, name="header name"), + enforce_bytes(v, name="header value"), + ) + for k, v in value.items() + ] + elif isinstance(value, typing.Sequence): + return [ + ( + enforce_bytes(k, name="header name"), + enforce_bytes(v, name="header value"), + ) + for k, v in value + ] + + seen_type = type(value).__name__ + raise TypeError( + f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}." + ) + + +def enforce_stream( + value: bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes] | None, + *, + name: str, +) -> typing.Iterable[bytes] | typing.AsyncIterable[bytes]: + if value is None: + return ByteStream(b"") + elif isinstance(value, bytes): + return ByteStream(value) + return value + + +# * https://tools.ietf.org/html/rfc3986#section-3.2.3 +# * https://url.spec.whatwg.org/#url-miscellaneous +# * https://url.spec.whatwg.org/#scheme-state +DEFAULT_PORTS = { + b"ftp": 21, + b"http": 80, + b"https": 443, + b"ws": 80, + b"wss": 443, +} + + +def include_request_headers( + headers: list[tuple[bytes, bytes]], + *, + url: "URL", + content: None | bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes], +) -> list[tuple[bytes, bytes]]: + headers_set = set(k.lower() for k, v in headers) + + if b"host" not in headers_set: + default_port = DEFAULT_PORTS.get(url.scheme) + if url.port is None or url.port == default_port: + header_value = url.host + else: + header_value = b"%b:%d" % (url.host, url.port) + headers = [(b"Host", header_value)] + headers + + if ( + content is not None + and b"content-length" not in headers_set + and b"transfer-encoding" not in headers_set + ): + if isinstance(content, bytes): + content_length = str(len(content)).encode("ascii") + headers += [(b"Content-Length", content_length)] + else: + headers += [(b"Transfer-Encoding", b"chunked")] # pragma: nocover + + return headers + + +# Interfaces for byte streams... + + +class ByteStream: + """ + A container for non-streaming content, and that supports both sync and async + stream iteration. + """ + + def __init__(self, content: bytes) -> None: + self._content = content + + def __iter__(self) -> typing.Iterator[bytes]: + yield self._content + + async def __aiter__(self) -> typing.AsyncIterator[bytes]: + yield self._content + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{len(self._content)} bytes]>" + + +class Origin: + def __init__(self, scheme: bytes, host: bytes, port: int) -> None: + self.scheme = scheme + self.host = host + self.port = port + + def __eq__(self, other: typing.Any) -> bool: + return ( + isinstance(other, Origin) + and self.scheme == other.scheme + and self.host == other.host + and self.port == other.port + ) + + def __str__(self) -> str: + scheme = self.scheme.decode("ascii") + host = self.host.decode("ascii") + port = str(self.port) + return f"{scheme}://{host}:{port}" + + +class URL: + """ + Represents the URL against which an HTTP request may be made. + + The URL may either be specified as a plain string, for convienence: + + ```python + url = httpcore.URL("https://www.example.com/") + ``` + + Or be constructed with explicitily pre-parsed components: + + ```python + url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/') + ``` + + Using this second more explicit style allows integrations that are using + `httpcore` to pass through URLs that have already been parsed in order to use + libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures + that URL parsing is treated identically at both the networking level and at any + higher layers of abstraction. + + The four components are important here, as they allow the URL to be precisely + specified in a pre-parsed format. They also allow certain types of request to + be created that could not otherwise be expressed. + + For example, an HTTP request to `http://www.example.com/` forwarded via a proxy + at `http://localhost:8080`... + + ```python + # Constructs an HTTP request with a complete URL as the target: + # GET https://www.example.com/ HTTP/1.1 + url = httpcore.URL( + scheme=b'http', + host=b'localhost', + port=8080, + target=b'https://www.example.com/' + ) + request = httpcore.Request( + method="GET", + url=url + ) + ``` + + Another example is constructing an `OPTIONS *` request... + + ```python + # Constructs an 'OPTIONS *' HTTP request: + # OPTIONS * HTTP/1.1 + url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*') + request = httpcore.Request(method="OPTIONS", url=url) + ``` + + This kind of request is not possible to formulate with a URL string, + because the `/` delimiter is always used to demark the target from the + host/port portion of the URL. + + For convenience, string-like arguments may be specified either as strings or + as bytes. However, once a request is being issue over-the-wire, the URL + components are always ultimately required to be a bytewise representation. + + In order to avoid any ambiguity over character encodings, when strings are used + as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`. + If you require a bytewise representation that is outside this range you must + handle the character encoding directly, and pass a bytes instance. + """ + + def __init__( + self, + url: bytes | str = "", + *, + scheme: bytes | str = b"", + host: bytes | str = b"", + port: int | None = None, + target: bytes | str = b"", + ) -> None: + """ + Parameters: + url: The complete URL as a string or bytes. + scheme: The URL scheme as a string or bytes. + Typically either `"http"` or `"https"`. + host: The URL host as a string or bytes. Such as `"www.example.com"`. + port: The port to connect to. Either an integer or `None`. + target: The target of the HTTP request. Such as `"/items?search=red"`. + """ + if url: + parsed = urllib.parse.urlparse(enforce_bytes(url, name="url")) + self.scheme = parsed.scheme + self.host = parsed.hostname or b"" + self.port = parsed.port + self.target = (parsed.path or b"/") + ( + b"?" + parsed.query if parsed.query else b"" + ) + else: + self.scheme = enforce_bytes(scheme, name="scheme") + self.host = enforce_bytes(host, name="host") + self.port = port + self.target = enforce_bytes(target, name="target") + + @property + def origin(self) -> Origin: + default_port = { + b"http": 80, + b"https": 443, + b"ws": 80, + b"wss": 443, + b"socks5": 1080, + b"socks5h": 1080, + }[self.scheme] + return Origin( + scheme=self.scheme, host=self.host, port=self.port or default_port + ) + + def __eq__(self, other: typing.Any) -> bool: + return ( + isinstance(other, URL) + and other.scheme == self.scheme + and other.host == self.host + and other.port == self.port + and other.target == self.target + ) + + def __bytes__(self) -> bytes: + if self.port is None: + return b"%b://%b%b" % (self.scheme, self.host, self.target) + return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(scheme={self.scheme!r}, " + f"host={self.host!r}, port={self.port!r}, target={self.target!r})" + ) + + +class Request: + """ + An HTTP request. + """ + + def __init__( + self, + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes + | typing.Iterable[bytes] + | typing.AsyncIterable[bytes] + | None = None, + extensions: Extensions | None = None, + ) -> None: + """ + Parameters: + method: The HTTP request method, either as a string or bytes. + For example: `GET`. + url: The request URL, either as a `URL` instance, or as a string or bytes. + For example: `"https://www.example.com".` + headers: The HTTP request headers. + content: The content of the request body. + extensions: A dictionary of optional extra information included on + the request. Possible keys include `"timeout"`, and `"trace"`. + """ + self.method: bytes = enforce_bytes(method, name="method") + self.url: URL = enforce_url(url, name="url") + self.headers: list[tuple[bytes, bytes]] = enforce_headers( + headers, name="headers" + ) + self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = ( + enforce_stream(content, name="content") + ) + self.extensions = {} if extensions is None else extensions + + if "target" in self.extensions: + self.url = URL( + scheme=self.url.scheme, + host=self.url.host, + port=self.url.port, + target=self.extensions["target"], + ) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.method!r}]>" + + +class Response: + """ + An HTTP response. + """ + + def __init__( + self, + status: int, + *, + headers: HeaderTypes = None, + content: bytes + | typing.Iterable[bytes] + | typing.AsyncIterable[bytes] + | None = None, + extensions: Extensions | None = None, + ) -> None: + """ + Parameters: + status: The HTTP status code of the response. For example `200`. + headers: The HTTP response headers. + content: The content of the response body. + extensions: A dictionary of optional extra information included on + the responseself.Possible keys include `"http_version"`, + `"reason_phrase"`, and `"network_stream"`. + """ + self.status: int = status + self.headers: list[tuple[bytes, bytes]] = enforce_headers( + headers, name="headers" + ) + self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = ( + enforce_stream(content, name="content") + ) + self.extensions = {} if extensions is None else extensions + + self._stream_consumed = False + + @property + def content(self) -> bytes: + if not hasattr(self, "_content"): + if isinstance(self.stream, typing.Iterable): + raise RuntimeError( + "Attempted to access 'response.content' on a streaming response. " + "Call 'response.read()' first." + ) + else: + raise RuntimeError( + "Attempted to access 'response.content' on a streaming response. " + "Call 'await response.aread()' first." + ) + return self._content + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.status}]>" + + # Sync interface... + + def read(self) -> bytes: + if not isinstance(self.stream, typing.Iterable): # pragma: nocover + raise RuntimeError( + "Attempted to read an asynchronous response using 'response.read()'. " + "You should use 'await response.aread()' instead." + ) + if not hasattr(self, "_content"): + self._content = b"".join([part for part in self.iter_stream()]) + return self._content + + def iter_stream(self) -> typing.Iterator[bytes]: + if not isinstance(self.stream, typing.Iterable): # pragma: nocover + raise RuntimeError( + "Attempted to stream an asynchronous response using 'for ... in " + "response.iter_stream()'. " + "You should use 'async for ... in response.aiter_stream()' instead." + ) + if self._stream_consumed: + raise RuntimeError( + "Attempted to call 'for ... in response.iter_stream()' more than once." + ) + self._stream_consumed = True + for chunk in self.stream: + yield chunk + + def close(self) -> None: + if not isinstance(self.stream, typing.Iterable): # pragma: nocover + raise RuntimeError( + "Attempted to close an asynchronous response using 'response.close()'. " + "You should use 'await response.aclose()' instead." + ) + if hasattr(self.stream, "close"): + self.stream.close() + + # Async interface... + + async def aread(self) -> bytes: + if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover + raise RuntimeError( + "Attempted to read an synchronous response using " + "'await response.aread()'. " + "You should use 'response.read()' instead." + ) + if not hasattr(self, "_content"): + self._content = b"".join([part async for part in self.aiter_stream()]) + return self._content + + async def aiter_stream(self) -> typing.AsyncIterator[bytes]: + if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover + raise RuntimeError( + "Attempted to stream an synchronous response using 'async for ... in " + "response.aiter_stream()'. " + "You should use 'for ... in response.iter_stream()' instead." + ) + if self._stream_consumed: + raise RuntimeError( + "Attempted to call 'async for ... in response.aiter_stream()' " + "more than once." + ) + self._stream_consumed = True + async for chunk in self.stream: + yield chunk + + async def aclose(self) -> None: + if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover + raise RuntimeError( + "Attempted to close a synchronous response using " + "'await response.aclose()'. " + "You should use 'response.close()' instead." + ) + if hasattr(self.stream, "aclose"): + await self.stream.aclose() + + +class Proxy: + def __init__( + self, + url: URL | bytes | str, + auth: tuple[bytes | str, bytes | str] | None = None, + headers: HeadersAsMapping | HeadersAsSequence | None = None, + ssl_context: ssl.SSLContext | None = None, + ): + self.url = enforce_url(url, name="url") + self.headers = enforce_headers(headers, name="headers") + self.ssl_context = ssl_context + + if auth is not None: + username = enforce_bytes(auth[0], name="auth") + password = enforce_bytes(auth[1], name="auth") + userpass = username + b":" + password + authorization = b"Basic " + base64.b64encode(userpass) + self.auth: tuple[bytes, bytes] | None = (username, password) + self.headers = [(b"Proxy-Authorization", authorization)] + self.headers + else: + self.auth = None diff --git a/httpcore/_ssl.py b/httpcore/_ssl.py new file mode 100644 index 0000000000000000000000000000000000000000..c99c5a67945b8a3a3544d481e979c791ab45fe23 --- /dev/null +++ b/httpcore/_ssl.py @@ -0,0 +1,9 @@ +import ssl + +import certifi + + +def default_ssl_context() -> ssl.SSLContext: + context = ssl.create_default_context() + context.load_verify_locations(certifi.where()) + return context diff --git a/httpcore/_synchronization.py b/httpcore/_synchronization.py new file mode 100644 index 0000000000000000000000000000000000000000..2ecc9e9c363e2f16c4f934cf41cf871826d6a495 --- /dev/null +++ b/httpcore/_synchronization.py @@ -0,0 +1,318 @@ +from __future__ import annotations + +import threading +import types + +from ._exceptions import ExceptionMapping, PoolTimeout, map_exceptions + +# Our async synchronization primatives use either 'anyio' or 'trio' depending +# on if they're running under asyncio or trio. + +try: + import trio +except (ImportError, NotImplementedError): # pragma: nocover + trio = None # type: ignore + +try: + import anyio +except ImportError: # pragma: nocover + anyio = None # type: ignore + + +def current_async_library() -> str: + # Determine if we're running under trio or asyncio. + # See https://sniffio.readthedocs.io/en/latest/ + try: + import sniffio + except ImportError: # pragma: nocover + environment = "asyncio" + else: + environment = sniffio.current_async_library() + + if environment not in ("asyncio", "trio"): # pragma: nocover + raise RuntimeError("Running under an unsupported async environment.") + + if environment == "asyncio" and anyio is None: # pragma: nocover + raise RuntimeError( + "Running with asyncio requires installation of 'httpcore[asyncio]'." + ) + + if environment == "trio" and trio is None: # pragma: nocover + raise RuntimeError( + "Running with trio requires installation of 'httpcore[trio]'." + ) + + return environment + + +class AsyncLock: + """ + This is a standard lock. + + In the sync case `Lock` provides thread locking. + In the async case `AsyncLock` provides async locking. + """ + + def __init__(self) -> None: + self._backend = "" + + def setup(self) -> None: + """ + Detect if we're running under 'asyncio' or 'trio' and create + a lock with the correct implementation. + """ + self._backend = current_async_library() + if self._backend == "trio": + self._trio_lock = trio.Lock() + elif self._backend == "asyncio": + self._anyio_lock = anyio.Lock() + + async def __aenter__(self) -> AsyncLock: + if not self._backend: + self.setup() + + if self._backend == "trio": + await self._trio_lock.acquire() + elif self._backend == "asyncio": + await self._anyio_lock.acquire() + + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + if self._backend == "trio": + self._trio_lock.release() + elif self._backend == "asyncio": + self._anyio_lock.release() + + +class AsyncThreadLock: + """ + This is a threading-only lock for no-I/O contexts. + + In the sync case `ThreadLock` provides thread locking. + In the async case `AsyncThreadLock` is a no-op. + """ + + def __enter__(self) -> AsyncThreadLock: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + pass + + +class AsyncEvent: + def __init__(self) -> None: + self._backend = "" + + def setup(self) -> None: + """ + Detect if we're running under 'asyncio' or 'trio' and create + a lock with the correct implementation. + """ + self._backend = current_async_library() + if self._backend == "trio": + self._trio_event = trio.Event() + elif self._backend == "asyncio": + self._anyio_event = anyio.Event() + + def set(self) -> None: + if not self._backend: + self.setup() + + if self._backend == "trio": + self._trio_event.set() + elif self._backend == "asyncio": + self._anyio_event.set() + + async def wait(self, timeout: float | None = None) -> None: + if not self._backend: + self.setup() + + if self._backend == "trio": + trio_exc_map: ExceptionMapping = {trio.TooSlowError: PoolTimeout} + timeout_or_inf = float("inf") if timeout is None else timeout + with map_exceptions(trio_exc_map): + with trio.fail_after(timeout_or_inf): + await self._trio_event.wait() + elif self._backend == "asyncio": + anyio_exc_map: ExceptionMapping = {TimeoutError: PoolTimeout} + with map_exceptions(anyio_exc_map): + with anyio.fail_after(timeout): + await self._anyio_event.wait() + + +class AsyncSemaphore: + def __init__(self, bound: int) -> None: + self._bound = bound + self._backend = "" + + def setup(self) -> None: + """ + Detect if we're running under 'asyncio' or 'trio' and create + a semaphore with the correct implementation. + """ + self._backend = current_async_library() + if self._backend == "trio": + self._trio_semaphore = trio.Semaphore( + initial_value=self._bound, max_value=self._bound + ) + elif self._backend == "asyncio": + self._anyio_semaphore = anyio.Semaphore( + initial_value=self._bound, max_value=self._bound + ) + + async def acquire(self) -> None: + if not self._backend: + self.setup() + + if self._backend == "trio": + await self._trio_semaphore.acquire() + elif self._backend == "asyncio": + await self._anyio_semaphore.acquire() + + async def release(self) -> None: + if self._backend == "trio": + self._trio_semaphore.release() + elif self._backend == "asyncio": + self._anyio_semaphore.release() + + +class AsyncShieldCancellation: + # For certain portions of our codebase where we're dealing with + # closing connections during exception handling we want to shield + # the operation from being cancelled. + # + # with AsyncShieldCancellation(): + # ... # clean-up operations, shielded from cancellation. + + def __init__(self) -> None: + """ + Detect if we're running under 'asyncio' or 'trio' and create + a shielded scope with the correct implementation. + """ + self._backend = current_async_library() + + if self._backend == "trio": + self._trio_shield = trio.CancelScope(shield=True) + elif self._backend == "asyncio": + self._anyio_shield = anyio.CancelScope(shield=True) + + def __enter__(self) -> AsyncShieldCancellation: + if self._backend == "trio": + self._trio_shield.__enter__() + elif self._backend == "asyncio": + self._anyio_shield.__enter__() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + if self._backend == "trio": + self._trio_shield.__exit__(exc_type, exc_value, traceback) + elif self._backend == "asyncio": + self._anyio_shield.__exit__(exc_type, exc_value, traceback) + + +# Our thread-based synchronization primitives... + + +class Lock: + """ + This is a standard lock. + + In the sync case `Lock` provides thread locking. + In the async case `AsyncLock` provides async locking. + """ + + def __init__(self) -> None: + self._lock = threading.Lock() + + def __enter__(self) -> Lock: + self._lock.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + self._lock.release() + + +class ThreadLock: + """ + This is a threading-only lock for no-I/O contexts. + + In the sync case `ThreadLock` provides thread locking. + In the async case `AsyncThreadLock` is a no-op. + """ + + def __init__(self) -> None: + self._lock = threading.Lock() + + def __enter__(self) -> ThreadLock: + self._lock.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + self._lock.release() + + +class Event: + def __init__(self) -> None: + self._event = threading.Event() + + def set(self) -> None: + self._event.set() + + def wait(self, timeout: float | None = None) -> None: + if timeout == float("inf"): # pragma: no cover + timeout = None + if not self._event.wait(timeout=timeout): + raise PoolTimeout() # pragma: nocover + + +class Semaphore: + def __init__(self, bound: int) -> None: + self._semaphore = threading.Semaphore(value=bound) + + def acquire(self) -> None: + self._semaphore.acquire() + + def release(self) -> None: + self._semaphore.release() + + +class ShieldCancellation: + # Thread-synchronous codebases don't support cancellation semantics. + # We have this class because we need to mirror the async and sync + # cases within our package, but it's just a no-op. + def __enter__(self) -> ShieldCancellation: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + pass diff --git a/httpcore/_trace.py b/httpcore/_trace.py new file mode 100644 index 0000000000000000000000000000000000000000..5f1cd7c47829ce17dbcf651ab56b4ffdce04a485 --- /dev/null +++ b/httpcore/_trace.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import inspect +import logging +import types +import typing + +from ._models import Request + + +class Trace: + def __init__( + self, + name: str, + logger: logging.Logger, + request: Request | None = None, + kwargs: dict[str, typing.Any] | None = None, + ) -> None: + self.name = name + self.logger = logger + self.trace_extension = ( + None if request is None else request.extensions.get("trace") + ) + self.debug = self.logger.isEnabledFor(logging.DEBUG) + self.kwargs = kwargs or {} + self.return_value: typing.Any = None + self.should_trace = self.debug or self.trace_extension is not None + self.prefix = self.logger.name.split(".")[-1] + + def trace(self, name: str, info: dict[str, typing.Any]) -> None: + if self.trace_extension is not None: + prefix_and_name = f"{self.prefix}.{name}" + ret = self.trace_extension(prefix_and_name, info) + if inspect.iscoroutine(ret): # pragma: no cover + raise TypeError( + "If you are using a synchronous interface, " + "the callback of the `trace` extension should " + "be a normal function instead of an asynchronous function." + ) + + if self.debug: + if not info or "return_value" in info and info["return_value"] is None: + message = name + else: + args = " ".join([f"{key}={value!r}" for key, value in info.items()]) + message = f"{name} {args}" + self.logger.debug(message) + + def __enter__(self) -> Trace: + if self.should_trace: + info = self.kwargs + self.trace(f"{self.name}.started", info) + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + if self.should_trace: + if exc_value is None: + info = {"return_value": self.return_value} + self.trace(f"{self.name}.complete", info) + else: + info = {"exception": exc_value} + self.trace(f"{self.name}.failed", info) + + async def atrace(self, name: str, info: dict[str, typing.Any]) -> None: + if self.trace_extension is not None: + prefix_and_name = f"{self.prefix}.{name}" + coro = self.trace_extension(prefix_and_name, info) + if not inspect.iscoroutine(coro): # pragma: no cover + raise TypeError( + "If you're using an asynchronous interface, " + "the callback of the `trace` extension should " + "be an asynchronous function rather than a normal function." + ) + await coro + + if self.debug: + if not info or "return_value" in info and info["return_value"] is None: + message = name + else: + args = " ".join([f"{key}={value!r}" for key, value in info.items()]) + message = f"{name} {args}" + self.logger.debug(message) + + async def __aenter__(self) -> Trace: + if self.should_trace: + info = self.kwargs + await self.atrace(f"{self.name}.started", info) + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + if self.should_trace: + if exc_value is None: + info = {"return_value": self.return_value} + await self.atrace(f"{self.name}.complete", info) + else: + info = {"exception": exc_value} + await self.atrace(f"{self.name}.failed", info) diff --git a/httpcore/_utils.py b/httpcore/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c44ff93cb2f572afc6e679308024b744b65c3b0a --- /dev/null +++ b/httpcore/_utils.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import select +import socket +import sys + + +def is_socket_readable(sock: socket.socket | None) -> bool: + """ + Return whether a socket, as identifed by its file descriptor, is readable. + "A socket is readable" means that the read buffer isn't empty, i.e. that calling + .recv() on it would immediately return some data. + """ + # NOTE: we want check for readability without actually attempting to read, because + # we don't want to block forever if it's not readable. + + # In the case that the socket no longer exists, or cannot return a file + # descriptor, we treat it as being readable, as if it the next read operation + # on it is ready to return the terminating `b""`. + sock_fd = None if sock is None else sock.fileno() + if sock_fd is None or sock_fd < 0: # pragma: nocover + return True + + # The implementation below was stolen from: + # https://github.com/python-trio/trio/blob/20ee2b1b7376db637435d80e266212a35837ddcc/trio/_socket.py#L471-L478 + # See also: https://github.com/encode/httpcore/pull/193#issuecomment-703129316 + + # Use select.select on Windows, and when poll is unavailable and select.poll + # everywhere else. (E.g. When eventlet is in use. See #327) + if ( + sys.platform == "win32" or getattr(select, "poll", None) is None + ): # pragma: nocover + rready, _, _ = select.select([sock_fd], [], [], 0) + return bool(rready) + p = select.poll() + p.register(sock_fd, select.POLLIN) + return bool(p.poll(0)) diff --git a/httpcore/py.typed b/httpcore/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/jinja2/__init__.py b/jinja2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a423a3eac16ac550e8e4008d7f5d79401b50e0f --- /dev/null +++ b/jinja2/__init__.py @@ -0,0 +1,38 @@ +"""Jinja is a template engine written in pure Python. It provides a +non-XML syntax that supports inline expressions and an optional +sandboxed environment. +""" + +from .bccache import BytecodeCache as BytecodeCache +from .bccache import FileSystemBytecodeCache as FileSystemBytecodeCache +from .bccache import MemcachedBytecodeCache as MemcachedBytecodeCache +from .environment import Environment as Environment +from .environment import Template as Template +from .exceptions import TemplateAssertionError as TemplateAssertionError +from .exceptions import TemplateError as TemplateError +from .exceptions import TemplateNotFound as TemplateNotFound +from .exceptions import TemplateRuntimeError as TemplateRuntimeError +from .exceptions import TemplatesNotFound as TemplatesNotFound +from .exceptions import TemplateSyntaxError as TemplateSyntaxError +from .exceptions import UndefinedError as UndefinedError +from .loaders import BaseLoader as BaseLoader +from .loaders import ChoiceLoader as ChoiceLoader +from .loaders import DictLoader as DictLoader +from .loaders import FileSystemLoader as FileSystemLoader +from .loaders import FunctionLoader as FunctionLoader +from .loaders import ModuleLoader as ModuleLoader +from .loaders import PackageLoader as PackageLoader +from .loaders import PrefixLoader as PrefixLoader +from .runtime import ChainableUndefined as ChainableUndefined +from .runtime import DebugUndefined as DebugUndefined +from .runtime import make_logging_undefined as make_logging_undefined +from .runtime import StrictUndefined as StrictUndefined +from .runtime import Undefined as Undefined +from .utils import clear_caches as clear_caches +from .utils import is_undefined as is_undefined +from .utils import pass_context as pass_context +from .utils import pass_environment as pass_environment +from .utils import pass_eval_context as pass_eval_context +from .utils import select_autoescape as select_autoescape + +__version__ = "3.1.6" diff --git a/jinja2/_identifier.py b/jinja2/_identifier.py new file mode 100644 index 0000000000000000000000000000000000000000..928c1503c7d414a8a86bbf5a82c68d42cb089bd2 --- /dev/null +++ b/jinja2/_identifier.py @@ -0,0 +1,6 @@ +import re + +# generated by scripts/generate_identifier_pattern.py +pattern = re.compile( + r"[\w·̀-ͯ·҃-֑҇-ׇֽֿׁׂׅׄؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ܑۭܰ-݊ަ-ް߫-߽߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙࠭-࡛࣓-ࣣ࣡-ःऺ-़ा-ॏ॑-ॗॢॣঁ-ঃ়া-ৄেৈো-্ৗৢৣ৾ਁ-ਃ਼ਾ-ੂੇੈੋ-੍ੑੰੱੵઁ-ઃ઼ા-ૅે-ૉો-્ૢૣૺ-૿ଁ-ଃ଼ା-ୄେୈୋ-୍ୖୗୢୣஂா-ூெ-ைொ-்ௗఀ-ఄా-ౄె-ైొ-్ౕౖౢౣಁ-ಃ಼ಾ-ೄೆ-ೈೊ-್ೕೖೢೣഀ-ഃ഻഼ാ-ൄെ-ൈൊ-്ൗൢൣංඃ්ා-ුූෘ-ෟෲෳัิ-ฺ็-๎ັິ-ູົຼ່-ໍ༹༘༙༵༷༾༿ཱ-྄྆྇ྍ-ྗྙ-ྼ࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏႚ-ႝ፝-፟ᜒ-᜔ᜲ-᜴ᝒᝓᝲᝳ឴-៓៝᠋-᠍ᢅᢆᢩᤠ-ᤫᤰ-᤻ᨗ-ᨛᩕ-ᩞ᩠-᩿᩼᪰-᪽ᬀ-ᬄ᬴-᭄᭫-᭳ᮀ-ᮂᮡ-ᮭ᯦-᯳ᰤ-᰷᳐-᳔᳒-᳨᳭ᳲ-᳴᳷-᳹᷀-᷹᷻-᷿‿⁀⁔⃐-⃥⃜⃡-⃰℘℮⳯-⵿⳱ⷠ-〪ⷿ-゙゚〯꙯ꙴ-꙽ꚞꚟ꛰꛱ꠂ꠆ꠋꠣ-ꠧꢀꢁꢴ-ꣅ꣠-꣱ꣿꤦ-꤭ꥇ-꥓ꦀ-ꦃ꦳-꧀ꧥꨩ-ꨶꩃꩌꩍꩻ-ꩽꪰꪲ-ꪴꪷꪸꪾ꪿꫁ꫫ-ꫯꫵ꫶ꯣ-ꯪ꯬꯭ﬞ︀-️︠-︯︳︴﹍-﹏_𐇽𐋠𐍶-𐍺𐨁-𐨃𐨅𐨆𐨌-𐨏𐨸-𐨿𐨺𐫦𐫥𐴤-𐽆𐴧-𐽐𑀀-𑀂𑀸-𑁆𑁿-𑂂𑂰-𑂺𑄀-𑄂𑄧-𑄴𑅅𑅆𑅳𑆀-𑆂𑆳-𑇀𑇉-𑇌𑈬-𑈷𑈾𑋟-𑋪𑌀-𑌃𑌻𑌼𑌾-𑍄𑍇𑍈𑍋-𑍍𑍗𑍢𑍣𑍦-𑍬𑍰-𑍴𑐵-𑑆𑑞𑒰-𑓃𑖯-𑖵𑖸-𑗀𑗜𑗝𑘰-𑙀𑚫-𑚷𑜝-𑜫𑠬-𑠺𑨁-𑨊𑨳-𑨹𑨻-𑨾𑩇𑩑-𑩛𑪊-𑪙𑰯-𑰶𑰸-𑰿𑲒-𑲧𑲩-𑲶𑴱-𑴶𑴺𑴼𑴽𑴿-𑵅𑵇𑶊-𑶎𑶐𑶑𑶓-𑶗𑻳-𑻶𖫰-𖫴𖬰-𖬶𖽑-𖽾𖾏-𖾒𛲝𛲞𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄𝨀-𝨶𝨻-𝩬𝩵𝪄𝪛-𝪟𝪡-𝪯𞀀-𞀆𞀈-𞀘𞀛-𞀡𞀣𞀤𞀦-𞣐𞀪-𞣖𞥄-𞥊󠄀-󠇯]+" # noqa: B950 +) diff --git a/jinja2/async_utils.py b/jinja2/async_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f0c140205c50a3df9863ce1ab610b0c62a483f1b --- /dev/null +++ b/jinja2/async_utils.py @@ -0,0 +1,99 @@ +import inspect +import typing as t +from functools import WRAPPER_ASSIGNMENTS +from functools import wraps + +from .utils import _PassArg +from .utils import pass_eval_context + +if t.TYPE_CHECKING: + import typing_extensions as te + +V = t.TypeVar("V") + + +def async_variant(normal_func): # type: ignore + def decorator(async_func): # type: ignore + pass_arg = _PassArg.from_obj(normal_func) + need_eval_context = pass_arg is None + + if pass_arg is _PassArg.environment: + + def is_async(args: t.Any) -> bool: + return t.cast(bool, args[0].is_async) + + else: + + def is_async(args: t.Any) -> bool: + return t.cast(bool, args[0].environment.is_async) + + # Take the doc and annotations from the sync function, but the + # name from the async function. Pallets-Sphinx-Themes + # build_function_directive expects __wrapped__ to point to the + # sync function. + async_func_attrs = ("__module__", "__name__", "__qualname__") + normal_func_attrs = tuple(set(WRAPPER_ASSIGNMENTS).difference(async_func_attrs)) + + @wraps(normal_func, assigned=normal_func_attrs) + @wraps(async_func, assigned=async_func_attrs, updated=()) + def wrapper(*args, **kwargs): # type: ignore + b = is_async(args) + + if need_eval_context: + args = args[1:] + + if b: + return async_func(*args, **kwargs) + + return normal_func(*args, **kwargs) + + if need_eval_context: + wrapper = pass_eval_context(wrapper) + + wrapper.jinja_async_variant = True # type: ignore[attr-defined] + return wrapper + + return decorator + + +_common_primitives = {int, float, bool, str, list, dict, tuple, type(None)} + + +async def auto_await(value: t.Union[t.Awaitable["V"], "V"]) -> "V": + # Avoid a costly call to isawaitable + if type(value) in _common_primitives: + return t.cast("V", value) + + if inspect.isawaitable(value): + return await t.cast("t.Awaitable[V]", value) + + return value + + +class _IteratorToAsyncIterator(t.Generic[V]): + def __init__(self, iterator: "t.Iterator[V]"): + self._iterator = iterator + + def __aiter__(self) -> "te.Self": + return self + + async def __anext__(self) -> V: + try: + return next(self._iterator) + except StopIteration as e: + raise StopAsyncIteration(e.value) from e + + +def auto_aiter( + iterable: "t.Union[t.AsyncIterable[V], t.Iterable[V]]", +) -> "t.AsyncIterator[V]": + if hasattr(iterable, "__aiter__"): + return iterable.__aiter__() + else: + return _IteratorToAsyncIterator(iter(iterable)) + + +async def auto_to_list( + value: "t.Union[t.AsyncIterable[V], t.Iterable[V]]", +) -> t.List["V"]: + return [x async for x in auto_aiter(value)] diff --git a/jinja2/bccache.py b/jinja2/bccache.py new file mode 100644 index 0000000000000000000000000000000000000000..ada8b099ff251ea9c6da4c42e1383f37e359f06a --- /dev/null +++ b/jinja2/bccache.py @@ -0,0 +1,408 @@ +"""The optional bytecode cache system. This is useful if you have very +complex template situations and the compilation of all those templates +slows down your application too much. + +Situations where this is useful are often forking web applications that +are initialized on the first request. +""" + +import errno +import fnmatch +import marshal +import os +import pickle +import stat +import sys +import tempfile +import typing as t +from hashlib import sha1 +from io import BytesIO +from types import CodeType + +if t.TYPE_CHECKING: + import typing_extensions as te + + from .environment import Environment + + class _MemcachedClient(te.Protocol): + def get(self, key: str) -> bytes: ... + + def set( + self, key: str, value: bytes, timeout: t.Optional[int] = None + ) -> None: ... + + +bc_version = 5 +# Magic bytes to identify Jinja bytecode cache files. Contains the +# Python major and minor version to avoid loading incompatible bytecode +# if a project upgrades its Python version. +bc_magic = ( + b"j2" + + pickle.dumps(bc_version, 2) + + pickle.dumps((sys.version_info[0] << 24) | sys.version_info[1], 2) +) + + +class Bucket: + """Buckets are used to store the bytecode for one template. It's created + and initialized by the bytecode cache and passed to the loading functions. + + The buckets get an internal checksum from the cache assigned and use this + to automatically reject outdated cache material. Individual bytecode + cache subclasses don't have to care about cache invalidation. + """ + + def __init__(self, environment: "Environment", key: str, checksum: str) -> None: + self.environment = environment + self.key = key + self.checksum = checksum + self.reset() + + def reset(self) -> None: + """Resets the bucket (unloads the bytecode).""" + self.code: t.Optional[CodeType] = None + + def load_bytecode(self, f: t.BinaryIO) -> None: + """Loads bytecode from a file or file like object.""" + # make sure the magic header is correct + magic = f.read(len(bc_magic)) + if magic != bc_magic: + self.reset() + return + # the source code of the file changed, we need to reload + checksum = pickle.load(f) + if self.checksum != checksum: + self.reset() + return + # if marshal_load fails then we need to reload + try: + self.code = marshal.load(f) + except (EOFError, ValueError, TypeError): + self.reset() + return + + def write_bytecode(self, f: t.IO[bytes]) -> None: + """Dump the bytecode into the file or file like object passed.""" + if self.code is None: + raise TypeError("can't write empty bucket") + f.write(bc_magic) + pickle.dump(self.checksum, f, 2) + marshal.dump(self.code, f) + + def bytecode_from_string(self, string: bytes) -> None: + """Load bytecode from bytes.""" + self.load_bytecode(BytesIO(string)) + + def bytecode_to_string(self) -> bytes: + """Return the bytecode as bytes.""" + out = BytesIO() + self.write_bytecode(out) + return out.getvalue() + + +class BytecodeCache: + """To implement your own bytecode cache you have to subclass this class + and override :meth:`load_bytecode` and :meth:`dump_bytecode`. Both of + these methods are passed a :class:`~jinja2.bccache.Bucket`. + + A very basic bytecode cache that saves the bytecode on the file system:: + + from os import path + + class MyCache(BytecodeCache): + + def __init__(self, directory): + self.directory = directory + + def load_bytecode(self, bucket): + filename = path.join(self.directory, bucket.key) + if path.exists(filename): + with open(filename, 'rb') as f: + bucket.load_bytecode(f) + + def dump_bytecode(self, bucket): + filename = path.join(self.directory, bucket.key) + with open(filename, 'wb') as f: + bucket.write_bytecode(f) + + A more advanced version of a filesystem based bytecode cache is part of + Jinja. + """ + + def load_bytecode(self, bucket: Bucket) -> None: + """Subclasses have to override this method to load bytecode into a + bucket. If they are not able to find code in the cache for the + bucket, it must not do anything. + """ + raise NotImplementedError() + + def dump_bytecode(self, bucket: Bucket) -> None: + """Subclasses have to override this method to write the bytecode + from a bucket back to the cache. If it unable to do so it must not + fail silently but raise an exception. + """ + raise NotImplementedError() + + def clear(self) -> None: + """Clears the cache. This method is not used by Jinja but should be + implemented to allow applications to clear the bytecode cache used + by a particular environment. + """ + + def get_cache_key( + self, name: str, filename: t.Optional[t.Union[str]] = None + ) -> str: + """Returns the unique hash key for this template name.""" + hash = sha1(name.encode("utf-8")) + + if filename is not None: + hash.update(f"|{filename}".encode()) + + return hash.hexdigest() + + def get_source_checksum(self, source: str) -> str: + """Returns a checksum for the source.""" + return sha1(source.encode("utf-8")).hexdigest() + + def get_bucket( + self, + environment: "Environment", + name: str, + filename: t.Optional[str], + source: str, + ) -> Bucket: + """Return a cache bucket for the given template. All arguments are + mandatory but filename may be `None`. + """ + key = self.get_cache_key(name, filename) + checksum = self.get_source_checksum(source) + bucket = Bucket(environment, key, checksum) + self.load_bytecode(bucket) + return bucket + + def set_bucket(self, bucket: Bucket) -> None: + """Put the bucket into the cache.""" + self.dump_bytecode(bucket) + + +class FileSystemBytecodeCache(BytecodeCache): + """A bytecode cache that stores bytecode on the filesystem. It accepts + two arguments: The directory where the cache items are stored and a + pattern string that is used to build the filename. + + If no directory is specified a default cache directory is selected. On + Windows the user's temp directory is used, on UNIX systems a directory + is created for the user in the system temp directory. + + The pattern can be used to have multiple separate caches operate on the + same directory. The default pattern is ``'__jinja2_%s.cache'``. ``%s`` + is replaced with the cache key. + + >>> bcc = FileSystemBytecodeCache('/tmp/jinja_cache', '%s.cache') + + This bytecode cache supports clearing of the cache using the clear method. + """ + + def __init__( + self, directory: t.Optional[str] = None, pattern: str = "__jinja2_%s.cache" + ) -> None: + if directory is None: + directory = self._get_default_cache_dir() + self.directory = directory + self.pattern = pattern + + def _get_default_cache_dir(self) -> str: + def _unsafe_dir() -> "te.NoReturn": + raise RuntimeError( + "Cannot determine safe temp directory. You " + "need to explicitly provide one." + ) + + tmpdir = tempfile.gettempdir() + + # On windows the temporary directory is used specific unless + # explicitly forced otherwise. We can just use that. + if os.name == "nt": + return tmpdir + if not hasattr(os, "getuid"): + _unsafe_dir() + + dirname = f"_jinja2-cache-{os.getuid()}" + actual_dir = os.path.join(tmpdir, dirname) + + try: + os.mkdir(actual_dir, stat.S_IRWXU) + except OSError as e: + if e.errno != errno.EEXIST: + raise + try: + os.chmod(actual_dir, stat.S_IRWXU) + actual_dir_stat = os.lstat(actual_dir) + if ( + actual_dir_stat.st_uid != os.getuid() + or not stat.S_ISDIR(actual_dir_stat.st_mode) + or stat.S_IMODE(actual_dir_stat.st_mode) != stat.S_IRWXU + ): + _unsafe_dir() + except OSError as e: + if e.errno != errno.EEXIST: + raise + + actual_dir_stat = os.lstat(actual_dir) + if ( + actual_dir_stat.st_uid != os.getuid() + or not stat.S_ISDIR(actual_dir_stat.st_mode) + or stat.S_IMODE(actual_dir_stat.st_mode) != stat.S_IRWXU + ): + _unsafe_dir() + + return actual_dir + + def _get_cache_filename(self, bucket: Bucket) -> str: + return os.path.join(self.directory, self.pattern % (bucket.key,)) + + def load_bytecode(self, bucket: Bucket) -> None: + filename = self._get_cache_filename(bucket) + + # Don't test for existence before opening the file, since the + # file could disappear after the test before the open. + try: + f = open(filename, "rb") + except (FileNotFoundError, IsADirectoryError, PermissionError): + # PermissionError can occur on Windows when an operation is + # in progress, such as calling clear(). + return + + with f: + bucket.load_bytecode(f) + + def dump_bytecode(self, bucket: Bucket) -> None: + # Write to a temporary file, then rename to the real name after + # writing. This avoids another process reading the file before + # it is fully written. + name = self._get_cache_filename(bucket) + f = tempfile.NamedTemporaryFile( + mode="wb", + dir=os.path.dirname(name), + prefix=os.path.basename(name), + suffix=".tmp", + delete=False, + ) + + def remove_silent() -> None: + try: + os.remove(f.name) + except OSError: + # Another process may have called clear(). On Windows, + # another program may be holding the file open. + pass + + try: + with f: + bucket.write_bytecode(f) + except BaseException: + remove_silent() + raise + + try: + os.replace(f.name, name) + except OSError: + # Another process may have called clear(). On Windows, + # another program may be holding the file open. + remove_silent() + except BaseException: + remove_silent() + raise + + def clear(self) -> None: + # imported lazily here because google app-engine doesn't support + # write access on the file system and the function does not exist + # normally. + from os import remove + + files = fnmatch.filter(os.listdir(self.directory), self.pattern % ("*",)) + for filename in files: + try: + remove(os.path.join(self.directory, filename)) + except OSError: + pass + + +class MemcachedBytecodeCache(BytecodeCache): + """This class implements a bytecode cache that uses a memcache cache for + storing the information. It does not enforce a specific memcache library + (tummy's memcache or cmemcache) but will accept any class that provides + the minimal interface required. + + Libraries compatible with this class: + + - `cachelib `_ + - `python-memcached `_ + + (Unfortunately the django cache interface is not compatible because it + does not support storing binary data, only text. You can however pass + the underlying cache client to the bytecode cache which is available + as `django.core.cache.cache._client`.) + + The minimal interface for the client passed to the constructor is this: + + .. class:: MinimalClientInterface + + .. method:: set(key, value[, timeout]) + + Stores the bytecode in the cache. `value` is a string and + `timeout` the timeout of the key. If timeout is not provided + a default timeout or no timeout should be assumed, if it's + provided it's an integer with the number of seconds the cache + item should exist. + + .. method:: get(key) + + Returns the value for the cache key. If the item does not + exist in the cache the return value must be `None`. + + The other arguments to the constructor are the prefix for all keys that + is added before the actual cache key and the timeout for the bytecode in + the cache system. We recommend a high (or no) timeout. + + This bytecode cache does not support clearing of used items in the cache. + The clear method is a no-operation function. + + .. versionadded:: 2.7 + Added support for ignoring memcache errors through the + `ignore_memcache_errors` parameter. + """ + + def __init__( + self, + client: "_MemcachedClient", + prefix: str = "jinja2/bytecode/", + timeout: t.Optional[int] = None, + ignore_memcache_errors: bool = True, + ): + self.client = client + self.prefix = prefix + self.timeout = timeout + self.ignore_memcache_errors = ignore_memcache_errors + + def load_bytecode(self, bucket: Bucket) -> None: + try: + code = self.client.get(self.prefix + bucket.key) + except Exception: + if not self.ignore_memcache_errors: + raise + else: + bucket.bytecode_from_string(code) + + def dump_bytecode(self, bucket: Bucket) -> None: + key = self.prefix + bucket.key + value = bucket.bytecode_to_string() + + try: + if self.timeout is not None: + self.client.set(key, value, self.timeout) + else: + self.client.set(key, value) + except Exception: + if not self.ignore_memcache_errors: + raise diff --git a/jinja2/compiler.py b/jinja2/compiler.py new file mode 100644 index 0000000000000000000000000000000000000000..a4ff6a1b11af3e1a868d1a74c48d842390259b43 --- /dev/null +++ b/jinja2/compiler.py @@ -0,0 +1,1998 @@ +"""Compiles nodes from the parser into Python code.""" + +import typing as t +from contextlib import contextmanager +from functools import update_wrapper +from io import StringIO +from itertools import chain +from keyword import iskeyword as is_python_keyword + +from markupsafe import escape +from markupsafe import Markup + +from . import nodes +from .exceptions import TemplateAssertionError +from .idtracking import Symbols +from .idtracking import VAR_LOAD_ALIAS +from .idtracking import VAR_LOAD_PARAMETER +from .idtracking import VAR_LOAD_RESOLVE +from .idtracking import VAR_LOAD_UNDEFINED +from .nodes import EvalContext +from .optimizer import Optimizer +from .utils import _PassArg +from .utils import concat +from .visitor import NodeVisitor + +if t.TYPE_CHECKING: + import typing_extensions as te + + from .environment import Environment + +F = t.TypeVar("F", bound=t.Callable[..., t.Any]) + +operators = { + "eq": "==", + "ne": "!=", + "gt": ">", + "gteq": ">=", + "lt": "<", + "lteq": "<=", + "in": "in", + "notin": "not in", +} + + +def optimizeconst(f: F) -> F: + def new_func( + self: "CodeGenerator", node: nodes.Expr, frame: "Frame", **kwargs: t.Any + ) -> t.Any: + # Only optimize if the frame is not volatile + if self.optimizer is not None and not frame.eval_ctx.volatile: + new_node = self.optimizer.visit(node, frame.eval_ctx) + + if new_node != node: + return self.visit(new_node, frame) + + return f(self, node, frame, **kwargs) + + return update_wrapper(new_func, f) # type: ignore[return-value] + + +def _make_binop(op: str) -> t.Callable[["CodeGenerator", nodes.BinExpr, "Frame"], None]: + @optimizeconst + def visitor(self: "CodeGenerator", node: nodes.BinExpr, frame: Frame) -> None: + if ( + self.environment.sandboxed and op in self.environment.intercepted_binops # type: ignore + ): + self.write(f"environment.call_binop(context, {op!r}, ") + self.visit(node.left, frame) + self.write(", ") + self.visit(node.right, frame) + else: + self.write("(") + self.visit(node.left, frame) + self.write(f" {op} ") + self.visit(node.right, frame) + + self.write(")") + + return visitor + + +def _make_unop( + op: str, +) -> t.Callable[["CodeGenerator", nodes.UnaryExpr, "Frame"], None]: + @optimizeconst + def visitor(self: "CodeGenerator", node: nodes.UnaryExpr, frame: Frame) -> None: + if ( + self.environment.sandboxed and op in self.environment.intercepted_unops # type: ignore + ): + self.write(f"environment.call_unop(context, {op!r}, ") + self.visit(node.node, frame) + else: + self.write("(" + op) + self.visit(node.node, frame) + + self.write(")") + + return visitor + + +def generate( + node: nodes.Template, + environment: "Environment", + name: t.Optional[str], + filename: t.Optional[str], + stream: t.Optional[t.TextIO] = None, + defer_init: bool = False, + optimized: bool = True, +) -> t.Optional[str]: + """Generate the python source for a node tree.""" + if not isinstance(node, nodes.Template): + raise TypeError("Can't compile non template nodes") + + generator = environment.code_generator_class( + environment, name, filename, stream, defer_init, optimized + ) + generator.visit(node) + + if stream is None: + return generator.stream.getvalue() # type: ignore + + return None + + +def has_safe_repr(value: t.Any) -> bool: + """Does the node have a safe representation?""" + if value is None or value is NotImplemented or value is Ellipsis: + return True + + if type(value) in {bool, int, float, complex, range, str, Markup}: + return True + + if type(value) in {tuple, list, set, frozenset}: + return all(has_safe_repr(v) for v in value) + + if type(value) is dict: # noqa E721 + return all(has_safe_repr(k) and has_safe_repr(v) for k, v in value.items()) + + return False + + +def find_undeclared( + nodes: t.Iterable[nodes.Node], names: t.Iterable[str] +) -> t.Set[str]: + """Check if the names passed are accessed undeclared. The return value + is a set of all the undeclared names from the sequence of names found. + """ + visitor = UndeclaredNameVisitor(names) + try: + for node in nodes: + visitor.visit(node) + except VisitorExit: + pass + return visitor.undeclared + + +class MacroRef: + def __init__(self, node: t.Union[nodes.Macro, nodes.CallBlock]) -> None: + self.node = node + self.accesses_caller = False + self.accesses_kwargs = False + self.accesses_varargs = False + + +class Frame: + """Holds compile time information for us.""" + + def __init__( + self, + eval_ctx: EvalContext, + parent: t.Optional["Frame"] = None, + level: t.Optional[int] = None, + ) -> None: + self.eval_ctx = eval_ctx + + # the parent of this frame + self.parent = parent + + if parent is None: + self.symbols = Symbols(level=level) + + # in some dynamic inheritance situations the compiler needs to add + # write tests around output statements. + self.require_output_check = False + + # inside some tags we are using a buffer rather than yield statements. + # this for example affects {% filter %} or {% macro %}. If a frame + # is buffered this variable points to the name of the list used as + # buffer. + self.buffer: t.Optional[str] = None + + # the name of the block we're in, otherwise None. + self.block: t.Optional[str] = None + + else: + self.symbols = Symbols(parent.symbols, level=level) + self.require_output_check = parent.require_output_check + self.buffer = parent.buffer + self.block = parent.block + + # a toplevel frame is the root + soft frames such as if conditions. + self.toplevel = False + + # the root frame is basically just the outermost frame, so no if + # conditions. This information is used to optimize inheritance + # situations. + self.rootlevel = False + + # variables set inside of loops and blocks should not affect outer frames, + # but they still needs to be kept track of as part of the active context. + self.loop_frame = False + self.block_frame = False + + # track whether the frame is being used in an if-statement or conditional + # expression as it determines which errors should be raised during runtime + # or compile time. + self.soft_frame = False + + def copy(self) -> "te.Self": + """Create a copy of the current one.""" + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.symbols = self.symbols.copy() + return rv + + def inner(self, isolated: bool = False) -> "Frame": + """Return an inner frame.""" + if isolated: + return Frame(self.eval_ctx, level=self.symbols.level + 1) + return Frame(self.eval_ctx, self) + + def soft(self) -> "te.Self": + """Return a soft frame. A soft frame may not be modified as + standalone thing as it shares the resources with the frame it + was created of, but it's not a rootlevel frame any longer. + + This is only used to implement if-statements and conditional + expressions. + """ + rv = self.copy() + rv.rootlevel = False + rv.soft_frame = True + return rv + + __copy__ = copy + + +class VisitorExit(RuntimeError): + """Exception used by the `UndeclaredNameVisitor` to signal a stop.""" + + +class DependencyFinderVisitor(NodeVisitor): + """A visitor that collects filter and test calls.""" + + def __init__(self) -> None: + self.filters: t.Set[str] = set() + self.tests: t.Set[str] = set() + + def visit_Filter(self, node: nodes.Filter) -> None: + self.generic_visit(node) + self.filters.add(node.name) + + def visit_Test(self, node: nodes.Test) -> None: + self.generic_visit(node) + self.tests.add(node.name) + + def visit_Block(self, node: nodes.Block) -> None: + """Stop visiting at blocks.""" + + +class UndeclaredNameVisitor(NodeVisitor): + """A visitor that checks if a name is accessed without being + declared. This is different from the frame visitor as it will + not stop at closure frames. + """ + + def __init__(self, names: t.Iterable[str]) -> None: + self.names = set(names) + self.undeclared: t.Set[str] = set() + + def visit_Name(self, node: nodes.Name) -> None: + if node.ctx == "load" and node.name in self.names: + self.undeclared.add(node.name) + if self.undeclared == self.names: + raise VisitorExit() + else: + self.names.discard(node.name) + + def visit_Block(self, node: nodes.Block) -> None: + """Stop visiting a blocks.""" + + +class CompilerExit(Exception): + """Raised if the compiler encountered a situation where it just + doesn't make sense to further process the code. Any block that + raises such an exception is not further processed. + """ + + +class CodeGenerator(NodeVisitor): + def __init__( + self, + environment: "Environment", + name: t.Optional[str], + filename: t.Optional[str], + stream: t.Optional[t.TextIO] = None, + defer_init: bool = False, + optimized: bool = True, + ) -> None: + if stream is None: + stream = StringIO() + self.environment = environment + self.name = name + self.filename = filename + self.stream = stream + self.created_block_context = False + self.defer_init = defer_init + self.optimizer: t.Optional[Optimizer] = None + + if optimized: + self.optimizer = Optimizer(environment) + + # aliases for imports + self.import_aliases: t.Dict[str, str] = {} + + # a registry for all blocks. Because blocks are moved out + # into the global python scope they are registered here + self.blocks: t.Dict[str, nodes.Block] = {} + + # the number of extends statements so far + self.extends_so_far = 0 + + # some templates have a rootlevel extends. In this case we + # can safely assume that we're a child template and do some + # more optimizations. + self.has_known_extends = False + + # the current line number + self.code_lineno = 1 + + # registry of all filters and tests (global, not block local) + self.tests: t.Dict[str, str] = {} + self.filters: t.Dict[str, str] = {} + + # the debug information + self.debug_info: t.List[t.Tuple[int, int]] = [] + self._write_debug_info: t.Optional[int] = None + + # the number of new lines before the next write() + self._new_lines = 0 + + # the line number of the last written statement + self._last_line = 0 + + # true if nothing was written so far. + self._first_write = True + + # used by the `temporary_identifier` method to get new + # unique, temporary identifier + self._last_identifier = 0 + + # the current indentation + self._indentation = 0 + + # Tracks toplevel assignments + self._assign_stack: t.List[t.Set[str]] = [] + + # Tracks parameter definition blocks + self._param_def_block: t.List[t.Set[str]] = [] + + # Tracks the current context. + self._context_reference_stack = ["context"] + + @property + def optimized(self) -> bool: + return self.optimizer is not None + + # -- Various compilation helpers + + def fail(self, msg: str, lineno: int) -> "te.NoReturn": + """Fail with a :exc:`TemplateAssertionError`.""" + raise TemplateAssertionError(msg, lineno, self.name, self.filename) + + def temporary_identifier(self) -> str: + """Get a new unique identifier.""" + self._last_identifier += 1 + return f"t_{self._last_identifier}" + + def buffer(self, frame: Frame) -> None: + """Enable buffering for the frame from that point onwards.""" + frame.buffer = self.temporary_identifier() + self.writeline(f"{frame.buffer} = []") + + def return_buffer_contents( + self, frame: Frame, force_unescaped: bool = False + ) -> None: + """Return the buffer contents of the frame.""" + if not force_unescaped: + if frame.eval_ctx.volatile: + self.writeline("if context.eval_ctx.autoescape:") + self.indent() + self.writeline(f"return Markup(concat({frame.buffer}))") + self.outdent() + self.writeline("else:") + self.indent() + self.writeline(f"return concat({frame.buffer})") + self.outdent() + return + elif frame.eval_ctx.autoescape: + self.writeline(f"return Markup(concat({frame.buffer}))") + return + self.writeline(f"return concat({frame.buffer})") + + def indent(self) -> None: + """Indent by one.""" + self._indentation += 1 + + def outdent(self, step: int = 1) -> None: + """Outdent by step.""" + self._indentation -= step + + def start_write(self, frame: Frame, node: t.Optional[nodes.Node] = None) -> None: + """Yield or write into the frame buffer.""" + if frame.buffer is None: + self.writeline("yield ", node) + else: + self.writeline(f"{frame.buffer}.append(", node) + + def end_write(self, frame: Frame) -> None: + """End the writing process started by `start_write`.""" + if frame.buffer is not None: + self.write(")") + + def simple_write( + self, s: str, frame: Frame, node: t.Optional[nodes.Node] = None + ) -> None: + """Simple shortcut for start_write + write + end_write.""" + self.start_write(frame, node) + self.write(s) + self.end_write(frame) + + def blockvisit(self, nodes: t.Iterable[nodes.Node], frame: Frame) -> None: + """Visit a list of nodes as block in a frame. If the current frame + is no buffer a dummy ``if 0: yield None`` is written automatically. + """ + try: + self.writeline("pass") + for node in nodes: + self.visit(node, frame) + except CompilerExit: + pass + + def write(self, x: str) -> None: + """Write a string into the output stream.""" + if self._new_lines: + if not self._first_write: + self.stream.write("\n" * self._new_lines) + self.code_lineno += self._new_lines + if self._write_debug_info is not None: + self.debug_info.append((self._write_debug_info, self.code_lineno)) + self._write_debug_info = None + self._first_write = False + self.stream.write(" " * self._indentation) + self._new_lines = 0 + self.stream.write(x) + + def writeline( + self, x: str, node: t.Optional[nodes.Node] = None, extra: int = 0 + ) -> None: + """Combination of newline and write.""" + self.newline(node, extra) + self.write(x) + + def newline(self, node: t.Optional[nodes.Node] = None, extra: int = 0) -> None: + """Add one or more newlines before the next write.""" + self._new_lines = max(self._new_lines, 1 + extra) + if node is not None and node.lineno != self._last_line: + self._write_debug_info = node.lineno + self._last_line = node.lineno + + def signature( + self, + node: t.Union[nodes.Call, nodes.Filter, nodes.Test], + frame: Frame, + extra_kwargs: t.Optional[t.Mapping[str, t.Any]] = None, + ) -> None: + """Writes a function call to the stream for the current node. + A leading comma is added automatically. The extra keyword + arguments may not include python keywords otherwise a syntax + error could occur. The extra keyword arguments should be given + as python dict. + """ + # if any of the given keyword arguments is a python keyword + # we have to make sure that no invalid call is created. + kwarg_workaround = any( + is_python_keyword(t.cast(str, k)) + for k in chain((x.key for x in node.kwargs), extra_kwargs or ()) + ) + + for arg in node.args: + self.write(", ") + self.visit(arg, frame) + + if not kwarg_workaround: + for kwarg in node.kwargs: + self.write(", ") + self.visit(kwarg, frame) + if extra_kwargs is not None: + for key, value in extra_kwargs.items(): + self.write(f", {key}={value}") + if node.dyn_args: + self.write(", *") + self.visit(node.dyn_args, frame) + + if kwarg_workaround: + if node.dyn_kwargs is not None: + self.write(", **dict({") + else: + self.write(", **{") + for kwarg in node.kwargs: + self.write(f"{kwarg.key!r}: ") + self.visit(kwarg.value, frame) + self.write(", ") + if extra_kwargs is not None: + for key, value in extra_kwargs.items(): + self.write(f"{key!r}: {value}, ") + if node.dyn_kwargs is not None: + self.write("}, **") + self.visit(node.dyn_kwargs, frame) + self.write(")") + else: + self.write("}") + + elif node.dyn_kwargs is not None: + self.write(", **") + self.visit(node.dyn_kwargs, frame) + + def pull_dependencies(self, nodes: t.Iterable[nodes.Node]) -> None: + """Find all filter and test names used in the template and + assign them to variables in the compiled namespace. Checking + that the names are registered with the environment is done when + compiling the Filter and Test nodes. If the node is in an If or + CondExpr node, the check is done at runtime instead. + + .. versionchanged:: 3.0 + Filters and tests in If and CondExpr nodes are checked at + runtime instead of compile time. + """ + visitor = DependencyFinderVisitor() + + for node in nodes: + visitor.visit(node) + + for id_map, names, dependency in ( + (self.filters, visitor.filters, "filters"), + ( + self.tests, + visitor.tests, + "tests", + ), + ): + for name in sorted(names): + if name not in id_map: + id_map[name] = self.temporary_identifier() + + # add check during runtime that dependencies used inside of executed + # blocks are defined, as this step may be skipped during compile time + self.writeline("try:") + self.indent() + self.writeline(f"{id_map[name]} = environment.{dependency}[{name!r}]") + self.outdent() + self.writeline("except KeyError:") + self.indent() + self.writeline("@internalcode") + self.writeline(f"def {id_map[name]}(*unused):") + self.indent() + self.writeline( + f'raise TemplateRuntimeError("No {dependency[:-1]}' + f' named {name!r} found.")' + ) + self.outdent() + self.outdent() + + def enter_frame(self, frame: Frame) -> None: + undefs = [] + for target, (action, param) in frame.symbols.loads.items(): + if action == VAR_LOAD_PARAMETER: + pass + elif action == VAR_LOAD_RESOLVE: + self.writeline(f"{target} = {self.get_resolve_func()}({param!r})") + elif action == VAR_LOAD_ALIAS: + self.writeline(f"{target} = {param}") + elif action == VAR_LOAD_UNDEFINED: + undefs.append(target) + else: + raise NotImplementedError("unknown load instruction") + if undefs: + self.writeline(f"{' = '.join(undefs)} = missing") + + def leave_frame(self, frame: Frame, with_python_scope: bool = False) -> None: + if not with_python_scope: + undefs = [] + for target in frame.symbols.loads: + undefs.append(target) + if undefs: + self.writeline(f"{' = '.join(undefs)} = missing") + + def choose_async(self, async_value: str = "async ", sync_value: str = "") -> str: + return async_value if self.environment.is_async else sync_value + + def func(self, name: str) -> str: + return f"{self.choose_async()}def {name}" + + def macro_body( + self, node: t.Union[nodes.Macro, nodes.CallBlock], frame: Frame + ) -> t.Tuple[Frame, MacroRef]: + """Dump the function def of a macro or call block.""" + frame = frame.inner() + frame.symbols.analyze_node(node) + macro_ref = MacroRef(node) + + explicit_caller = None + skip_special_params = set() + args = [] + + for idx, arg in enumerate(node.args): + if arg.name == "caller": + explicit_caller = idx + if arg.name in ("kwargs", "varargs"): + skip_special_params.add(arg.name) + args.append(frame.symbols.ref(arg.name)) + + undeclared = find_undeclared(node.body, ("caller", "kwargs", "varargs")) + + if "caller" in undeclared: + # In older Jinja versions there was a bug that allowed caller + # to retain the special behavior even if it was mentioned in + # the argument list. However thankfully this was only really + # working if it was the last argument. So we are explicitly + # checking this now and error out if it is anywhere else in + # the argument list. + if explicit_caller is not None: + try: + node.defaults[explicit_caller - len(node.args)] + except IndexError: + self.fail( + "When defining macros or call blocks the " + 'special "caller" argument must be omitted ' + "or be given a default.", + node.lineno, + ) + else: + args.append(frame.symbols.declare_parameter("caller")) + macro_ref.accesses_caller = True + if "kwargs" in undeclared and "kwargs" not in skip_special_params: + args.append(frame.symbols.declare_parameter("kwargs")) + macro_ref.accesses_kwargs = True + if "varargs" in undeclared and "varargs" not in skip_special_params: + args.append(frame.symbols.declare_parameter("varargs")) + macro_ref.accesses_varargs = True + + # macros are delayed, they never require output checks + frame.require_output_check = False + frame.symbols.analyze_node(node) + self.writeline(f"{self.func('macro')}({', '.join(args)}):", node) + self.indent() + + self.buffer(frame) + self.enter_frame(frame) + + self.push_parameter_definitions(frame) + for idx, arg in enumerate(node.args): + ref = frame.symbols.ref(arg.name) + self.writeline(f"if {ref} is missing:") + self.indent() + try: + default = node.defaults[idx - len(node.args)] + except IndexError: + self.writeline( + f'{ref} = undefined("parameter {arg.name!r} was not provided",' + f" name={arg.name!r})" + ) + else: + self.writeline(f"{ref} = ") + self.visit(default, frame) + self.mark_parameter_stored(ref) + self.outdent() + self.pop_parameter_definitions() + + self.blockvisit(node.body, frame) + self.return_buffer_contents(frame, force_unescaped=True) + self.leave_frame(frame, with_python_scope=True) + self.outdent() + + return frame, macro_ref + + def macro_def(self, macro_ref: MacroRef, frame: Frame) -> None: + """Dump the macro definition for the def created by macro_body.""" + arg_tuple = ", ".join(repr(x.name) for x in macro_ref.node.args) + name = getattr(macro_ref.node, "name", None) + if len(macro_ref.node.args) == 1: + arg_tuple += "," + self.write( + f"Macro(environment, macro, {name!r}, ({arg_tuple})," + f" {macro_ref.accesses_kwargs!r}, {macro_ref.accesses_varargs!r}," + f" {macro_ref.accesses_caller!r}, context.eval_ctx.autoescape)" + ) + + def position(self, node: nodes.Node) -> str: + """Return a human readable position for the node.""" + rv = f"line {node.lineno}" + if self.name is not None: + rv = f"{rv} in {self.name!r}" + return rv + + def dump_local_context(self, frame: Frame) -> str: + items_kv = ", ".join( + f"{name!r}: {target}" + for name, target in frame.symbols.dump_stores().items() + ) + return f"{{{items_kv}}}" + + def write_commons(self) -> None: + """Writes a common preamble that is used by root and block functions. + Primarily this sets up common local helpers and enforces a generator + through a dead branch. + """ + self.writeline("resolve = context.resolve_or_missing") + self.writeline("undefined = environment.undefined") + self.writeline("concat = environment.concat") + # always use the standard Undefined class for the implicit else of + # conditional expressions + self.writeline("cond_expr_undefined = Undefined") + self.writeline("if 0: yield None") + + def push_parameter_definitions(self, frame: Frame) -> None: + """Pushes all parameter targets from the given frame into a local + stack that permits tracking of yet to be assigned parameters. In + particular this enables the optimization from `visit_Name` to skip + undefined expressions for parameters in macros as macros can reference + otherwise unbound parameters. + """ + self._param_def_block.append(frame.symbols.dump_param_targets()) + + def pop_parameter_definitions(self) -> None: + """Pops the current parameter definitions set.""" + self._param_def_block.pop() + + def mark_parameter_stored(self, target: str) -> None: + """Marks a parameter in the current parameter definitions as stored. + This will skip the enforced undefined checks. + """ + if self._param_def_block: + self._param_def_block[-1].discard(target) + + def push_context_reference(self, target: str) -> None: + self._context_reference_stack.append(target) + + def pop_context_reference(self) -> None: + self._context_reference_stack.pop() + + def get_context_ref(self) -> str: + return self._context_reference_stack[-1] + + def get_resolve_func(self) -> str: + target = self._context_reference_stack[-1] + if target == "context": + return "resolve" + return f"{target}.resolve" + + def derive_context(self, frame: Frame) -> str: + return f"{self.get_context_ref()}.derived({self.dump_local_context(frame)})" + + def parameter_is_undeclared(self, target: str) -> bool: + """Checks if a given target is an undeclared parameter.""" + if not self._param_def_block: + return False + return target in self._param_def_block[-1] + + def push_assign_tracking(self) -> None: + """Pushes a new layer for assignment tracking.""" + self._assign_stack.append(set()) + + def pop_assign_tracking(self, frame: Frame) -> None: + """Pops the topmost level for assignment tracking and updates the + context variables if necessary. + """ + vars = self._assign_stack.pop() + if ( + not frame.block_frame + and not frame.loop_frame + and not frame.toplevel + or not vars + ): + return + public_names = [x for x in vars if x[:1] != "_"] + if len(vars) == 1: + name = next(iter(vars)) + ref = frame.symbols.ref(name) + if frame.loop_frame: + self.writeline(f"_loop_vars[{name!r}] = {ref}") + return + if frame.block_frame: + self.writeline(f"_block_vars[{name!r}] = {ref}") + return + self.writeline(f"context.vars[{name!r}] = {ref}") + else: + if frame.loop_frame: + self.writeline("_loop_vars.update({") + elif frame.block_frame: + self.writeline("_block_vars.update({") + else: + self.writeline("context.vars.update({") + for idx, name in enumerate(sorted(vars)): + if idx: + self.write(", ") + ref = frame.symbols.ref(name) + self.write(f"{name!r}: {ref}") + self.write("})") + if not frame.block_frame and not frame.loop_frame and public_names: + if len(public_names) == 1: + self.writeline(f"context.exported_vars.add({public_names[0]!r})") + else: + names_str = ", ".join(map(repr, sorted(public_names))) + self.writeline(f"context.exported_vars.update(({names_str}))") + + # -- Statement Visitors + + def visit_Template( + self, node: nodes.Template, frame: t.Optional[Frame] = None + ) -> None: + assert frame is None, "no root frame allowed" + eval_ctx = EvalContext(self.environment, self.name) + + from .runtime import async_exported + from .runtime import exported + + if self.environment.is_async: + exported_names = sorted(exported + async_exported) + else: + exported_names = sorted(exported) + + self.writeline("from jinja2.runtime import " + ", ".join(exported_names)) + + # if we want a deferred initialization we cannot move the + # environment into a local name + envenv = "" if self.defer_init else ", environment=environment" + + # do we have an extends tag at all? If not, we can save some + # overhead by just not processing any inheritance code. + have_extends = node.find(nodes.Extends) is not None + + # find all blocks + for block in node.find_all(nodes.Block): + if block.name in self.blocks: + self.fail(f"block {block.name!r} defined twice", block.lineno) + self.blocks[block.name] = block + + # find all imports and import them + for import_ in node.find_all(nodes.ImportedName): + if import_.importname not in self.import_aliases: + imp = import_.importname + self.import_aliases[imp] = alias = self.temporary_identifier() + if "." in imp: + module, obj = imp.rsplit(".", 1) + self.writeline(f"from {module} import {obj} as {alias}") + else: + self.writeline(f"import {imp} as {alias}") + + # add the load name + self.writeline(f"name = {self.name!r}") + + # generate the root render function. + self.writeline( + f"{self.func('root')}(context, missing=missing{envenv}):", extra=1 + ) + self.indent() + self.write_commons() + + # process the root + frame = Frame(eval_ctx) + if "self" in find_undeclared(node.body, ("self",)): + ref = frame.symbols.declare_parameter("self") + self.writeline(f"{ref} = TemplateReference(context)") + frame.symbols.analyze_node(node) + frame.toplevel = frame.rootlevel = True + frame.require_output_check = have_extends and not self.has_known_extends + if have_extends: + self.writeline("parent_template = None") + self.enter_frame(frame) + self.pull_dependencies(node.body) + self.blockvisit(node.body, frame) + self.leave_frame(frame, with_python_scope=True) + self.outdent() + + # make sure that the parent root is called. + if have_extends: + if not self.has_known_extends: + self.indent() + self.writeline("if parent_template is not None:") + self.indent() + if not self.environment.is_async: + self.writeline("yield from parent_template.root_render_func(context)") + else: + self.writeline("agen = parent_template.root_render_func(context)") + self.writeline("try:") + self.indent() + self.writeline("async for event in agen:") + self.indent() + self.writeline("yield event") + self.outdent() + self.outdent() + self.writeline("finally: await agen.aclose()") + self.outdent(1 + (not self.has_known_extends)) + + # at this point we now have the blocks collected and can visit them too. + for name, block in self.blocks.items(): + self.writeline( + f"{self.func('block_' + name)}(context, missing=missing{envenv}):", + block, + 1, + ) + self.indent() + self.write_commons() + # It's important that we do not make this frame a child of the + # toplevel template. This would cause a variety of + # interesting issues with identifier tracking. + block_frame = Frame(eval_ctx) + block_frame.block_frame = True + undeclared = find_undeclared(block.body, ("self", "super")) + if "self" in undeclared: + ref = block_frame.symbols.declare_parameter("self") + self.writeline(f"{ref} = TemplateReference(context)") + if "super" in undeclared: + ref = block_frame.symbols.declare_parameter("super") + self.writeline(f"{ref} = context.super({name!r}, block_{name})") + block_frame.symbols.analyze_node(block) + block_frame.block = name + self.writeline("_block_vars = {}") + self.enter_frame(block_frame) + self.pull_dependencies(block.body) + self.blockvisit(block.body, block_frame) + self.leave_frame(block_frame, with_python_scope=True) + self.outdent() + + blocks_kv_str = ", ".join(f"{x!r}: block_{x}" for x in self.blocks) + self.writeline(f"blocks = {{{blocks_kv_str}}}", extra=1) + debug_kv_str = "&".join(f"{k}={v}" for k, v in self.debug_info) + self.writeline(f"debug_info = {debug_kv_str!r}") + + def visit_Block(self, node: nodes.Block, frame: Frame) -> None: + """Call a block and register it for the template.""" + level = 0 + if frame.toplevel: + # if we know that we are a child template, there is no need to + # check if we are one + if self.has_known_extends: + return + if self.extends_so_far > 0: + self.writeline("if parent_template is None:") + self.indent() + level += 1 + + if node.scoped: + context = self.derive_context(frame) + else: + context = self.get_context_ref() + + if node.required: + self.writeline(f"if len(context.blocks[{node.name!r}]) <= 1:", node) + self.indent() + self.writeline( + f'raise TemplateRuntimeError("Required block {node.name!r} not found")', + node, + ) + self.outdent() + + if not self.environment.is_async and frame.buffer is None: + self.writeline( + f"yield from context.blocks[{node.name!r}][0]({context})", node + ) + else: + self.writeline(f"gen = context.blocks[{node.name!r}][0]({context})") + self.writeline("try:") + self.indent() + self.writeline( + f"{self.choose_async()}for event in gen:", + node, + ) + self.indent() + self.simple_write("event", frame) + self.outdent() + self.outdent() + self.writeline( + f"finally: {self.choose_async('await gen.aclose()', 'gen.close()')}" + ) + + self.outdent(level) + + def visit_Extends(self, node: nodes.Extends, frame: Frame) -> None: + """Calls the extender.""" + if not frame.toplevel: + self.fail("cannot use extend from a non top-level scope", node.lineno) + + # if the number of extends statements in general is zero so + # far, we don't have to add a check if something extended + # the template before this one. + if self.extends_so_far > 0: + # if we have a known extends we just add a template runtime + # error into the generated code. We could catch that at compile + # time too, but i welcome it not to confuse users by throwing the + # same error at different times just "because we can". + if not self.has_known_extends: + self.writeline("if parent_template is not None:") + self.indent() + self.writeline('raise TemplateRuntimeError("extended multiple times")') + + # if we have a known extends already we don't need that code here + # as we know that the template execution will end here. + if self.has_known_extends: + raise CompilerExit() + else: + self.outdent() + + self.writeline("parent_template = environment.get_template(", node) + self.visit(node.template, frame) + self.write(f", {self.name!r})") + self.writeline("for name, parent_block in parent_template.blocks.items():") + self.indent() + self.writeline("context.blocks.setdefault(name, []).append(parent_block)") + self.outdent() + + # if this extends statement was in the root level we can take + # advantage of that information and simplify the generated code + # in the top level from this point onwards + if frame.rootlevel: + self.has_known_extends = True + + # and now we have one more + self.extends_so_far += 1 + + def visit_Include(self, node: nodes.Include, frame: Frame) -> None: + """Handles includes.""" + if node.ignore_missing: + self.writeline("try:") + self.indent() + + func_name = "get_or_select_template" + if isinstance(node.template, nodes.Const): + if isinstance(node.template.value, str): + func_name = "get_template" + elif isinstance(node.template.value, (tuple, list)): + func_name = "select_template" + elif isinstance(node.template, (nodes.Tuple, nodes.List)): + func_name = "select_template" + + self.writeline(f"template = environment.{func_name}(", node) + self.visit(node.template, frame) + self.write(f", {self.name!r})") + if node.ignore_missing: + self.outdent() + self.writeline("except TemplateNotFound:") + self.indent() + self.writeline("pass") + self.outdent() + self.writeline("else:") + self.indent() + + def loop_body() -> None: + self.indent() + self.simple_write("event", frame) + self.outdent() + + if node.with_context: + self.writeline( + f"gen = template.root_render_func(" + "template.new_context(context.get_all(), True," + f" {self.dump_local_context(frame)}))" + ) + self.writeline("try:") + self.indent() + self.writeline(f"{self.choose_async()}for event in gen:") + loop_body() + self.outdent() + self.writeline( + f"finally: {self.choose_async('await gen.aclose()', 'gen.close()')}" + ) + elif self.environment.is_async: + self.writeline( + "for event in (await template._get_default_module_async())" + "._body_stream:" + ) + loop_body() + else: + self.writeline("yield from template._get_default_module()._body_stream") + + if node.ignore_missing: + self.outdent() + + def _import_common( + self, node: t.Union[nodes.Import, nodes.FromImport], frame: Frame + ) -> None: + self.write(f"{self.choose_async('await ')}environment.get_template(") + self.visit(node.template, frame) + self.write(f", {self.name!r}).") + + if node.with_context: + f_name = f"make_module{self.choose_async('_async')}" + self.write( + f"{f_name}(context.get_all(), True, {self.dump_local_context(frame)})" + ) + else: + self.write(f"_get_default_module{self.choose_async('_async')}(context)") + + def visit_Import(self, node: nodes.Import, frame: Frame) -> None: + """Visit regular imports.""" + self.writeline(f"{frame.symbols.ref(node.target)} = ", node) + if frame.toplevel: + self.write(f"context.vars[{node.target!r}] = ") + + self._import_common(node, frame) + + if frame.toplevel and not node.target.startswith("_"): + self.writeline(f"context.exported_vars.discard({node.target!r})") + + def visit_FromImport(self, node: nodes.FromImport, frame: Frame) -> None: + """Visit named imports.""" + self.newline(node) + self.write("included_template = ") + self._import_common(node, frame) + var_names = [] + discarded_names = [] + for name in node.names: + if isinstance(name, tuple): + name, alias = name + else: + alias = name + self.writeline( + f"{frame.symbols.ref(alias)} =" + f" getattr(included_template, {name!r}, missing)" + ) + self.writeline(f"if {frame.symbols.ref(alias)} is missing:") + self.indent() + # The position will contain the template name, and will be formatted + # into a string that will be compiled into an f-string. Curly braces + # in the name must be replaced with escapes so that they will not be + # executed as part of the f-string. + position = self.position(node).replace("{", "{{").replace("}", "}}") + message = ( + "the template {included_template.__name__!r}" + f" (imported on {position})" + f" does not export the requested name {name!r}" + ) + self.writeline( + f"{frame.symbols.ref(alias)} = undefined(f{message!r}, name={name!r})" + ) + self.outdent() + if frame.toplevel: + var_names.append(alias) + if not alias.startswith("_"): + discarded_names.append(alias) + + if var_names: + if len(var_names) == 1: + name = var_names[0] + self.writeline(f"context.vars[{name!r}] = {frame.symbols.ref(name)}") + else: + names_kv = ", ".join( + f"{name!r}: {frame.symbols.ref(name)}" for name in var_names + ) + self.writeline(f"context.vars.update({{{names_kv}}})") + if discarded_names: + if len(discarded_names) == 1: + self.writeline(f"context.exported_vars.discard({discarded_names[0]!r})") + else: + names_str = ", ".join(map(repr, discarded_names)) + self.writeline( + f"context.exported_vars.difference_update(({names_str}))" + ) + + def visit_For(self, node: nodes.For, frame: Frame) -> None: + loop_frame = frame.inner() + loop_frame.loop_frame = True + test_frame = frame.inner() + else_frame = frame.inner() + + # try to figure out if we have an extended loop. An extended loop + # is necessary if the loop is in recursive mode if the special loop + # variable is accessed in the body if the body is a scoped block. + extended_loop = ( + node.recursive + or "loop" + in find_undeclared(node.iter_child_nodes(only=("body",)), ("loop",)) + or any(block.scoped for block in node.find_all(nodes.Block)) + ) + + loop_ref = None + if extended_loop: + loop_ref = loop_frame.symbols.declare_parameter("loop") + + loop_frame.symbols.analyze_node(node, for_branch="body") + if node.else_: + else_frame.symbols.analyze_node(node, for_branch="else") + + if node.test: + loop_filter_func = self.temporary_identifier() + test_frame.symbols.analyze_node(node, for_branch="test") + self.writeline(f"{self.func(loop_filter_func)}(fiter):", node.test) + self.indent() + self.enter_frame(test_frame) + self.writeline(self.choose_async("async for ", "for ")) + self.visit(node.target, loop_frame) + self.write(" in ") + self.write(self.choose_async("auto_aiter(fiter)", "fiter")) + self.write(":") + self.indent() + self.writeline("if ", node.test) + self.visit(node.test, test_frame) + self.write(":") + self.indent() + self.writeline("yield ") + self.visit(node.target, loop_frame) + self.outdent(3) + self.leave_frame(test_frame, with_python_scope=True) + + # if we don't have an recursive loop we have to find the shadowed + # variables at that point. Because loops can be nested but the loop + # variable is a special one we have to enforce aliasing for it. + if node.recursive: + self.writeline( + f"{self.func('loop')}(reciter, loop_render_func, depth=0):", node + ) + self.indent() + self.buffer(loop_frame) + + # Use the same buffer for the else frame + else_frame.buffer = loop_frame.buffer + + # make sure the loop variable is a special one and raise a template + # assertion error if a loop tries to write to loop + if extended_loop: + self.writeline(f"{loop_ref} = missing") + + for name in node.find_all(nodes.Name): + if name.ctx == "store" and name.name == "loop": + self.fail( + "Can't assign to special loop variable in for-loop target", + name.lineno, + ) + + if node.else_: + iteration_indicator = self.temporary_identifier() + self.writeline(f"{iteration_indicator} = 1") + + self.writeline(self.choose_async("async for ", "for "), node) + self.visit(node.target, loop_frame) + if extended_loop: + self.write(f", {loop_ref} in {self.choose_async('Async')}LoopContext(") + else: + self.write(" in ") + + if node.test: + self.write(f"{loop_filter_func}(") + if node.recursive: + self.write("reciter") + else: + if self.environment.is_async and not extended_loop: + self.write("auto_aiter(") + self.visit(node.iter, frame) + if self.environment.is_async and not extended_loop: + self.write(")") + if node.test: + self.write(")") + + if node.recursive: + self.write(", undefined, loop_render_func, depth):") + else: + self.write(", undefined):" if extended_loop else ":") + + self.indent() + self.enter_frame(loop_frame) + + self.writeline("_loop_vars = {}") + self.blockvisit(node.body, loop_frame) + if node.else_: + self.writeline(f"{iteration_indicator} = 0") + self.outdent() + self.leave_frame( + loop_frame, with_python_scope=node.recursive and not node.else_ + ) + + if node.else_: + self.writeline(f"if {iteration_indicator}:") + self.indent() + self.enter_frame(else_frame) + self.blockvisit(node.else_, else_frame) + self.leave_frame(else_frame) + self.outdent() + + # if the node was recursive we have to return the buffer contents + # and start the iteration code + if node.recursive: + self.return_buffer_contents(loop_frame) + self.outdent() + self.start_write(frame, node) + self.write(f"{self.choose_async('await ')}loop(") + if self.environment.is_async: + self.write("auto_aiter(") + self.visit(node.iter, frame) + if self.environment.is_async: + self.write(")") + self.write(", loop)") + self.end_write(frame) + + # at the end of the iteration, clear any assignments made in the + # loop from the top level + if self._assign_stack: + self._assign_stack[-1].difference_update(loop_frame.symbols.stores) + + def visit_If(self, node: nodes.If, frame: Frame) -> None: + if_frame = frame.soft() + self.writeline("if ", node) + self.visit(node.test, if_frame) + self.write(":") + self.indent() + self.blockvisit(node.body, if_frame) + self.outdent() + for elif_ in node.elif_: + self.writeline("elif ", elif_) + self.visit(elif_.test, if_frame) + self.write(":") + self.indent() + self.blockvisit(elif_.body, if_frame) + self.outdent() + if node.else_: + self.writeline("else:") + self.indent() + self.blockvisit(node.else_, if_frame) + self.outdent() + + def visit_Macro(self, node: nodes.Macro, frame: Frame) -> None: + macro_frame, macro_ref = self.macro_body(node, frame) + self.newline() + if frame.toplevel: + if not node.name.startswith("_"): + self.write(f"context.exported_vars.add({node.name!r})") + self.writeline(f"context.vars[{node.name!r}] = ") + self.write(f"{frame.symbols.ref(node.name)} = ") + self.macro_def(macro_ref, macro_frame) + + def visit_CallBlock(self, node: nodes.CallBlock, frame: Frame) -> None: + call_frame, macro_ref = self.macro_body(node, frame) + self.writeline("caller = ") + self.macro_def(macro_ref, call_frame) + self.start_write(frame, node) + self.visit_Call(node.call, frame, forward_caller=True) + self.end_write(frame) + + def visit_FilterBlock(self, node: nodes.FilterBlock, frame: Frame) -> None: + filter_frame = frame.inner() + filter_frame.symbols.analyze_node(node) + self.enter_frame(filter_frame) + self.buffer(filter_frame) + self.blockvisit(node.body, filter_frame) + self.start_write(frame, node) + self.visit_Filter(node.filter, filter_frame) + self.end_write(frame) + self.leave_frame(filter_frame) + + def visit_With(self, node: nodes.With, frame: Frame) -> None: + with_frame = frame.inner() + with_frame.symbols.analyze_node(node) + self.enter_frame(with_frame) + for target, expr in zip(node.targets, node.values): + self.newline() + self.visit(target, with_frame) + self.write(" = ") + self.visit(expr, frame) + self.blockvisit(node.body, with_frame) + self.leave_frame(with_frame) + + def visit_ExprStmt(self, node: nodes.ExprStmt, frame: Frame) -> None: + self.newline(node) + self.visit(node.node, frame) + + class _FinalizeInfo(t.NamedTuple): + const: t.Optional[t.Callable[..., str]] + src: t.Optional[str] + + @staticmethod + def _default_finalize(value: t.Any) -> t.Any: + """The default finalize function if the environment isn't + configured with one. Or, if the environment has one, this is + called on that function's output for constants. + """ + return str(value) + + _finalize: t.Optional[_FinalizeInfo] = None + + def _make_finalize(self) -> _FinalizeInfo: + """Build the finalize function to be used on constants and at + runtime. Cached so it's only created once for all output nodes. + + Returns a ``namedtuple`` with the following attributes: + + ``const`` + A function to finalize constant data at compile time. + + ``src`` + Source code to output around nodes to be evaluated at + runtime. + """ + if self._finalize is not None: + return self._finalize + + finalize: t.Optional[t.Callable[..., t.Any]] + finalize = default = self._default_finalize + src = None + + if self.environment.finalize: + src = "environment.finalize(" + env_finalize = self.environment.finalize + pass_arg = { + _PassArg.context: "context", + _PassArg.eval_context: "context.eval_ctx", + _PassArg.environment: "environment", + }.get( + _PassArg.from_obj(env_finalize) # type: ignore + ) + finalize = None + + if pass_arg is None: + + def finalize(value: t.Any) -> t.Any: # noqa: F811 + return default(env_finalize(value)) + + else: + src = f"{src}{pass_arg}, " + + if pass_arg == "environment": + + def finalize(value: t.Any) -> t.Any: # noqa: F811 + return default(env_finalize(self.environment, value)) + + self._finalize = self._FinalizeInfo(finalize, src) + return self._finalize + + def _output_const_repr(self, group: t.Iterable[t.Any]) -> str: + """Given a group of constant values converted from ``Output`` + child nodes, produce a string to write to the template module + source. + """ + return repr(concat(group)) + + def _output_child_to_const( + self, node: nodes.Expr, frame: Frame, finalize: _FinalizeInfo + ) -> str: + """Try to optimize a child of an ``Output`` node by trying to + convert it to constant, finalized data at compile time. + + If :exc:`Impossible` is raised, the node is not constant and + will be evaluated at runtime. Any other exception will also be + evaluated at runtime for easier debugging. + """ + const = node.as_const(frame.eval_ctx) + + if frame.eval_ctx.autoescape: + const = escape(const) + + # Template data doesn't go through finalize. + if isinstance(node, nodes.TemplateData): + return str(const) + + return finalize.const(const) # type: ignore + + def _output_child_pre( + self, node: nodes.Expr, frame: Frame, finalize: _FinalizeInfo + ) -> None: + """Output extra source code before visiting a child of an + ``Output`` node. + """ + if frame.eval_ctx.volatile: + self.write("(escape if context.eval_ctx.autoescape else str)(") + elif frame.eval_ctx.autoescape: + self.write("escape(") + else: + self.write("str(") + + if finalize.src is not None: + self.write(finalize.src) + + def _output_child_post( + self, node: nodes.Expr, frame: Frame, finalize: _FinalizeInfo + ) -> None: + """Output extra source code after visiting a child of an + ``Output`` node. + """ + self.write(")") + + if finalize.src is not None: + self.write(")") + + def visit_Output(self, node: nodes.Output, frame: Frame) -> None: + # If an extends is active, don't render outside a block. + if frame.require_output_check: + # A top-level extends is known to exist at compile time. + if self.has_known_extends: + return + + self.writeline("if parent_template is None:") + self.indent() + + finalize = self._make_finalize() + body: t.List[t.Union[t.List[t.Any], nodes.Expr]] = [] + + # Evaluate constants at compile time if possible. Each item in + # body will be either a list of static data or a node to be + # evaluated at runtime. + for child in node.nodes: + try: + if not ( + # If the finalize function requires runtime context, + # constants can't be evaluated at compile time. + finalize.const + # Unless it's basic template data that won't be + # finalized anyway. + or isinstance(child, nodes.TemplateData) + ): + raise nodes.Impossible() + + const = self._output_child_to_const(child, frame, finalize) + except (nodes.Impossible, Exception): + # The node was not constant and needs to be evaluated at + # runtime. Or another error was raised, which is easier + # to debug at runtime. + body.append(child) + continue + + if body and isinstance(body[-1], list): + body[-1].append(const) + else: + body.append([const]) + + if frame.buffer is not None: + if len(body) == 1: + self.writeline(f"{frame.buffer}.append(") + else: + self.writeline(f"{frame.buffer}.extend((") + + self.indent() + + for item in body: + if isinstance(item, list): + # A group of constant data to join and output. + val = self._output_const_repr(item) + + if frame.buffer is None: + self.writeline("yield " + val) + else: + self.writeline(val + ",") + else: + if frame.buffer is None: + self.writeline("yield ", item) + else: + self.newline(item) + + # A node to be evaluated at runtime. + self._output_child_pre(item, frame, finalize) + self.visit(item, frame) + self._output_child_post(item, frame, finalize) + + if frame.buffer is not None: + self.write(",") + + if frame.buffer is not None: + self.outdent() + self.writeline(")" if len(body) == 1 else "))") + + if frame.require_output_check: + self.outdent() + + def visit_Assign(self, node: nodes.Assign, frame: Frame) -> None: + self.push_assign_tracking() + + # ``a.b`` is allowed for assignment, and is parsed as an NSRef. However, + # it is only valid if it references a Namespace object. Emit a check for + # that for each ref here, before assignment code is emitted. This can't + # be done in visit_NSRef as the ref could be in the middle of a tuple. + seen_refs: t.Set[str] = set() + + for nsref in node.find_all(nodes.NSRef): + if nsref.name in seen_refs: + # Only emit the check for each reference once, in case the same + # ref is used multiple times in a tuple, `ns.a, ns.b = c, d`. + continue + + seen_refs.add(nsref.name) + ref = frame.symbols.ref(nsref.name) + self.writeline(f"if not isinstance({ref}, Namespace):") + self.indent() + self.writeline( + "raise TemplateRuntimeError" + '("cannot assign attribute on non-namespace object")' + ) + self.outdent() + + self.newline(node) + self.visit(node.target, frame) + self.write(" = ") + self.visit(node.node, frame) + self.pop_assign_tracking(frame) + + def visit_AssignBlock(self, node: nodes.AssignBlock, frame: Frame) -> None: + self.push_assign_tracking() + block_frame = frame.inner() + # This is a special case. Since a set block always captures we + # will disable output checks. This way one can use set blocks + # toplevel even in extended templates. + block_frame.require_output_check = False + block_frame.symbols.analyze_node(node) + self.enter_frame(block_frame) + self.buffer(block_frame) + self.blockvisit(node.body, block_frame) + self.newline(node) + self.visit(node.target, frame) + self.write(" = (Markup if context.eval_ctx.autoescape else identity)(") + if node.filter is not None: + self.visit_Filter(node.filter, block_frame) + else: + self.write(f"concat({block_frame.buffer})") + self.write(")") + self.pop_assign_tracking(frame) + self.leave_frame(block_frame) + + # -- Expression Visitors + + def visit_Name(self, node: nodes.Name, frame: Frame) -> None: + if node.ctx == "store" and ( + frame.toplevel or frame.loop_frame or frame.block_frame + ): + if self._assign_stack: + self._assign_stack[-1].add(node.name) + ref = frame.symbols.ref(node.name) + + # If we are looking up a variable we might have to deal with the + # case where it's undefined. We can skip that case if the load + # instruction indicates a parameter which are always defined. + if node.ctx == "load": + load = frame.symbols.find_load(ref) + if not ( + load is not None + and load[0] == VAR_LOAD_PARAMETER + and not self.parameter_is_undeclared(ref) + ): + self.write( + f"(undefined(name={node.name!r}) if {ref} is missing else {ref})" + ) + return + + self.write(ref) + + def visit_NSRef(self, node: nodes.NSRef, frame: Frame) -> None: + # NSRef is a dotted assignment target a.b=c, but uses a[b]=c internally. + # visit_Assign emits code to validate that each ref is to a Namespace + # object only. That can't be emitted here as the ref could be in the + # middle of a tuple assignment. + ref = frame.symbols.ref(node.name) + self.writeline(f"{ref}[{node.attr!r}]") + + def visit_Const(self, node: nodes.Const, frame: Frame) -> None: + val = node.as_const(frame.eval_ctx) + if isinstance(val, float): + self.write(str(val)) + else: + self.write(repr(val)) + + def visit_TemplateData(self, node: nodes.TemplateData, frame: Frame) -> None: + try: + self.write(repr(node.as_const(frame.eval_ctx))) + except nodes.Impossible: + self.write( + f"(Markup if context.eval_ctx.autoescape else identity)({node.data!r})" + ) + + def visit_Tuple(self, node: nodes.Tuple, frame: Frame) -> None: + self.write("(") + idx = -1 + for idx, item in enumerate(node.items): + if idx: + self.write(", ") + self.visit(item, frame) + self.write(",)" if idx == 0 else ")") + + def visit_List(self, node: nodes.List, frame: Frame) -> None: + self.write("[") + for idx, item in enumerate(node.items): + if idx: + self.write(", ") + self.visit(item, frame) + self.write("]") + + def visit_Dict(self, node: nodes.Dict, frame: Frame) -> None: + self.write("{") + for idx, item in enumerate(node.items): + if idx: + self.write(", ") + self.visit(item.key, frame) + self.write(": ") + self.visit(item.value, frame) + self.write("}") + + visit_Add = _make_binop("+") + visit_Sub = _make_binop("-") + visit_Mul = _make_binop("*") + visit_Div = _make_binop("/") + visit_FloorDiv = _make_binop("//") + visit_Pow = _make_binop("**") + visit_Mod = _make_binop("%") + visit_And = _make_binop("and") + visit_Or = _make_binop("or") + visit_Pos = _make_unop("+") + visit_Neg = _make_unop("-") + visit_Not = _make_unop("not ") + + @optimizeconst + def visit_Concat(self, node: nodes.Concat, frame: Frame) -> None: + if frame.eval_ctx.volatile: + func_name = "(markup_join if context.eval_ctx.volatile else str_join)" + elif frame.eval_ctx.autoescape: + func_name = "markup_join" + else: + func_name = "str_join" + self.write(f"{func_name}((") + for arg in node.nodes: + self.visit(arg, frame) + self.write(", ") + self.write("))") + + @optimizeconst + def visit_Compare(self, node: nodes.Compare, frame: Frame) -> None: + self.write("(") + self.visit(node.expr, frame) + for op in node.ops: + self.visit(op, frame) + self.write(")") + + def visit_Operand(self, node: nodes.Operand, frame: Frame) -> None: + self.write(f" {operators[node.op]} ") + self.visit(node.expr, frame) + + @optimizeconst + def visit_Getattr(self, node: nodes.Getattr, frame: Frame) -> None: + if self.environment.is_async: + self.write("(await auto_await(") + + self.write("environment.getattr(") + self.visit(node.node, frame) + self.write(f", {node.attr!r})") + + if self.environment.is_async: + self.write("))") + + @optimizeconst + def visit_Getitem(self, node: nodes.Getitem, frame: Frame) -> None: + # slices bypass the environment getitem method. + if isinstance(node.arg, nodes.Slice): + self.visit(node.node, frame) + self.write("[") + self.visit(node.arg, frame) + self.write("]") + else: + if self.environment.is_async: + self.write("(await auto_await(") + + self.write("environment.getitem(") + self.visit(node.node, frame) + self.write(", ") + self.visit(node.arg, frame) + self.write(")") + + if self.environment.is_async: + self.write("))") + + def visit_Slice(self, node: nodes.Slice, frame: Frame) -> None: + if node.start is not None: + self.visit(node.start, frame) + self.write(":") + if node.stop is not None: + self.visit(node.stop, frame) + if node.step is not None: + self.write(":") + self.visit(node.step, frame) + + @contextmanager + def _filter_test_common( + self, node: t.Union[nodes.Filter, nodes.Test], frame: Frame, is_filter: bool + ) -> t.Iterator[None]: + if self.environment.is_async: + self.write("(await auto_await(") + + if is_filter: + self.write(f"{self.filters[node.name]}(") + func = self.environment.filters.get(node.name) + else: + self.write(f"{self.tests[node.name]}(") + func = self.environment.tests.get(node.name) + + # When inside an If or CondExpr frame, allow the filter to be + # undefined at compile time and only raise an error if it's + # actually called at runtime. See pull_dependencies. + if func is None and not frame.soft_frame: + type_name = "filter" if is_filter else "test" + self.fail(f"No {type_name} named {node.name!r}.", node.lineno) + + pass_arg = { + _PassArg.context: "context", + _PassArg.eval_context: "context.eval_ctx", + _PassArg.environment: "environment", + }.get( + _PassArg.from_obj(func) # type: ignore + ) + + if pass_arg is not None: + self.write(f"{pass_arg}, ") + + # Back to the visitor function to handle visiting the target of + # the filter or test. + yield + + self.signature(node, frame) + self.write(")") + + if self.environment.is_async: + self.write("))") + + @optimizeconst + def visit_Filter(self, node: nodes.Filter, frame: Frame) -> None: + with self._filter_test_common(node, frame, True): + # if the filter node is None we are inside a filter block + # and want to write to the current buffer + if node.node is not None: + self.visit(node.node, frame) + elif frame.eval_ctx.volatile: + self.write( + f"(Markup(concat({frame.buffer}))" + f" if context.eval_ctx.autoescape else concat({frame.buffer}))" + ) + elif frame.eval_ctx.autoescape: + self.write(f"Markup(concat({frame.buffer}))") + else: + self.write(f"concat({frame.buffer})") + + @optimizeconst + def visit_Test(self, node: nodes.Test, frame: Frame) -> None: + with self._filter_test_common(node, frame, False): + self.visit(node.node, frame) + + @optimizeconst + def visit_CondExpr(self, node: nodes.CondExpr, frame: Frame) -> None: + frame = frame.soft() + + def write_expr2() -> None: + if node.expr2 is not None: + self.visit(node.expr2, frame) + return + + self.write( + f'cond_expr_undefined("the inline if-expression on' + f" {self.position(node)} evaluated to false and no else" + f' section was defined.")' + ) + + self.write("(") + self.visit(node.expr1, frame) + self.write(" if ") + self.visit(node.test, frame) + self.write(" else ") + write_expr2() + self.write(")") + + @optimizeconst + def visit_Call( + self, node: nodes.Call, frame: Frame, forward_caller: bool = False + ) -> None: + if self.environment.is_async: + self.write("(await auto_await(") + if self.environment.sandboxed: + self.write("environment.call(context, ") + else: + self.write("context.call(") + self.visit(node.node, frame) + extra_kwargs = {"caller": "caller"} if forward_caller else None + loop_kwargs = {"_loop_vars": "_loop_vars"} if frame.loop_frame else {} + block_kwargs = {"_block_vars": "_block_vars"} if frame.block_frame else {} + if extra_kwargs: + extra_kwargs.update(loop_kwargs, **block_kwargs) + elif loop_kwargs or block_kwargs: + extra_kwargs = dict(loop_kwargs, **block_kwargs) + self.signature(node, frame, extra_kwargs) + self.write(")") + if self.environment.is_async: + self.write("))") + + def visit_Keyword(self, node: nodes.Keyword, frame: Frame) -> None: + self.write(node.key + "=") + self.visit(node.value, frame) + + # -- Unused nodes for extensions + + def visit_MarkSafe(self, node: nodes.MarkSafe, frame: Frame) -> None: + self.write("Markup(") + self.visit(node.expr, frame) + self.write(")") + + def visit_MarkSafeIfAutoescape( + self, node: nodes.MarkSafeIfAutoescape, frame: Frame + ) -> None: + self.write("(Markup if context.eval_ctx.autoescape else identity)(") + self.visit(node.expr, frame) + self.write(")") + + def visit_EnvironmentAttribute( + self, node: nodes.EnvironmentAttribute, frame: Frame + ) -> None: + self.write("environment." + node.name) + + def visit_ExtensionAttribute( + self, node: nodes.ExtensionAttribute, frame: Frame + ) -> None: + self.write(f"environment.extensions[{node.identifier!r}].{node.name}") + + def visit_ImportedName(self, node: nodes.ImportedName, frame: Frame) -> None: + self.write(self.import_aliases[node.importname]) + + def visit_InternalName(self, node: nodes.InternalName, frame: Frame) -> None: + self.write(node.name) + + def visit_ContextReference( + self, node: nodes.ContextReference, frame: Frame + ) -> None: + self.write("context") + + def visit_DerivedContextReference( + self, node: nodes.DerivedContextReference, frame: Frame + ) -> None: + self.write(self.derive_context(frame)) + + def visit_Continue(self, node: nodes.Continue, frame: Frame) -> None: + self.writeline("continue", node) + + def visit_Break(self, node: nodes.Break, frame: Frame) -> None: + self.writeline("break", node) + + def visit_Scope(self, node: nodes.Scope, frame: Frame) -> None: + scope_frame = frame.inner() + scope_frame.symbols.analyze_node(node) + self.enter_frame(scope_frame) + self.blockvisit(node.body, scope_frame) + self.leave_frame(scope_frame) + + def visit_OverlayScope(self, node: nodes.OverlayScope, frame: Frame) -> None: + ctx = self.temporary_identifier() + self.writeline(f"{ctx} = {self.derive_context(frame)}") + self.writeline(f"{ctx}.vars = ") + self.visit(node.context, frame) + self.push_context_reference(ctx) + + scope_frame = frame.inner(isolated=True) + scope_frame.symbols.analyze_node(node) + self.enter_frame(scope_frame) + self.blockvisit(node.body, scope_frame) + self.leave_frame(scope_frame) + self.pop_context_reference() + + def visit_EvalContextModifier( + self, node: nodes.EvalContextModifier, frame: Frame + ) -> None: + for keyword in node.options: + self.writeline(f"context.eval_ctx.{keyword.key} = ") + self.visit(keyword.value, frame) + try: + val = keyword.value.as_const(frame.eval_ctx) + except nodes.Impossible: + frame.eval_ctx.volatile = True + else: + setattr(frame.eval_ctx, keyword.key, val) + + def visit_ScopedEvalContextModifier( + self, node: nodes.ScopedEvalContextModifier, frame: Frame + ) -> None: + old_ctx_name = self.temporary_identifier() + saved_ctx = frame.eval_ctx.save() + self.writeline(f"{old_ctx_name} = context.eval_ctx.save()") + self.visit_EvalContextModifier(node, frame) + for child in node.body: + self.visit(child, frame) + frame.eval_ctx.revert(saved_ctx) + self.writeline(f"context.eval_ctx.revert({old_ctx_name})") diff --git a/jinja2/constants.py b/jinja2/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..41a1c23b0a7fe134b1f662545876eb65b31b071e --- /dev/null +++ b/jinja2/constants.py @@ -0,0 +1,20 @@ +#: list of lorem ipsum words used by the lipsum() helper function +LOREM_IPSUM_WORDS = """\ +a ac accumsan ad adipiscing aenean aliquam aliquet amet ante aptent arcu at +auctor augue bibendum blandit class commodo condimentum congue consectetuer +consequat conubia convallis cras cubilia cum curabitur curae cursus dapibus +diam dictum dictumst dignissim dis dolor donec dui duis egestas eget eleifend +elementum elit enim erat eros est et etiam eu euismod facilisi facilisis fames +faucibus felis fermentum feugiat fringilla fusce gravida habitant habitasse hac +hendrerit hymenaeos iaculis id imperdiet in inceptos integer interdum ipsum +justo lacinia lacus laoreet lectus leo libero ligula litora lobortis lorem +luctus maecenas magna magnis malesuada massa mattis mauris metus mi molestie +mollis montes morbi mus nam nascetur natoque nec neque netus nibh nisi nisl non +nonummy nostra nulla nullam nunc odio orci ornare parturient pede pellentesque +penatibus per pharetra phasellus placerat platea porta porttitor posuere +potenti praesent pretium primis proin pulvinar purus quam quis quisque rhoncus +ridiculus risus rutrum sagittis sapien scelerisque sed sem semper senectus sit +sociis sociosqu sodales sollicitudin suscipit suspendisse taciti tellus tempor +tempus tincidunt torquent tortor tristique turpis ullamcorper ultrices +ultricies urna ut varius vehicula vel velit venenatis vestibulum vitae vivamus +viverra volutpat vulputate""" diff --git a/jinja2/debug.py b/jinja2/debug.py new file mode 100644 index 0000000000000000000000000000000000000000..eeeeee78b620f5d0745133b4629647973cd7af87 --- /dev/null +++ b/jinja2/debug.py @@ -0,0 +1,191 @@ +import sys +import typing as t +from types import CodeType +from types import TracebackType + +from .exceptions import TemplateSyntaxError +from .utils import internal_code +from .utils import missing + +if t.TYPE_CHECKING: + from .runtime import Context + + +def rewrite_traceback_stack(source: t.Optional[str] = None) -> BaseException: + """Rewrite the current exception to replace any tracebacks from + within compiled template code with tracebacks that look like they + came from the template source. + + This must be called within an ``except`` block. + + :param source: For ``TemplateSyntaxError``, the original source if + known. + :return: The original exception with the rewritten traceback. + """ + _, exc_value, tb = sys.exc_info() + exc_value = t.cast(BaseException, exc_value) + tb = t.cast(TracebackType, tb) + + if isinstance(exc_value, TemplateSyntaxError) and not exc_value.translated: + exc_value.translated = True + exc_value.source = source + # Remove the old traceback, otherwise the frames from the + # compiler still show up. + exc_value.with_traceback(None) + # Outside of runtime, so the frame isn't executing template + # code, but it still needs to point at the template. + tb = fake_traceback( + exc_value, None, exc_value.filename or "", exc_value.lineno + ) + else: + # Skip the frame for the render function. + tb = tb.tb_next + + stack = [] + + # Build the stack of traceback object, replacing any in template + # code with the source file and line information. + while tb is not None: + # Skip frames decorated with @internalcode. These are internal + # calls that aren't useful in template debugging output. + if tb.tb_frame.f_code in internal_code: + tb = tb.tb_next + continue + + template = tb.tb_frame.f_globals.get("__jinja_template__") + + if template is not None: + lineno = template.get_corresponding_lineno(tb.tb_lineno) + fake_tb = fake_traceback(exc_value, tb, template.filename, lineno) + stack.append(fake_tb) + else: + stack.append(tb) + + tb = tb.tb_next + + tb_next = None + + # Assign tb_next in reverse to avoid circular references. + for tb in reversed(stack): + tb.tb_next = tb_next + tb_next = tb + + return exc_value.with_traceback(tb_next) + + +def fake_traceback( # type: ignore + exc_value: BaseException, tb: t.Optional[TracebackType], filename: str, lineno: int +) -> TracebackType: + """Produce a new traceback object that looks like it came from the + template source instead of the compiled code. The filename, line + number, and location name will point to the template, and the local + variables will be the current template context. + + :param exc_value: The original exception to be re-raised to create + the new traceback. + :param tb: The original traceback to get the local variables and + code info from. + :param filename: The template filename. + :param lineno: The line number in the template source. + """ + if tb is not None: + # Replace the real locals with the context that would be + # available at that point in the template. + locals = get_template_locals(tb.tb_frame.f_locals) + locals.pop("__jinja_exception__", None) + else: + locals = {} + + globals = { + "__name__": filename, + "__file__": filename, + "__jinja_exception__": exc_value, + } + # Raise an exception at the correct line number. + code: CodeType = compile( + "\n" * (lineno - 1) + "raise __jinja_exception__", filename, "exec" + ) + + # Build a new code object that points to the template file and + # replaces the location with a block name. + location = "template" + + if tb is not None: + function = tb.tb_frame.f_code.co_name + + if function == "root": + location = "top-level template code" + elif function.startswith("block_"): + location = f"block {function[6:]!r}" + + if sys.version_info >= (3, 8): + code = code.replace(co_name=location) + else: + code = CodeType( + code.co_argcount, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + code.co_filename, + location, + code.co_firstlineno, + code.co_lnotab, + code.co_freevars, + code.co_cellvars, + ) + + # Execute the new code, which is guaranteed to raise, and return + # the new traceback without this frame. + try: + exec(code, globals, locals) + except BaseException: + return sys.exc_info()[2].tb_next # type: ignore + + +def get_template_locals(real_locals: t.Mapping[str, t.Any]) -> t.Dict[str, t.Any]: + """Based on the runtime locals, get the context that would be + available at that point in the template. + """ + # Start with the current template context. + ctx: t.Optional[Context] = real_locals.get("context") + + if ctx is not None: + data: t.Dict[str, t.Any] = ctx.get_all().copy() + else: + data = {} + + # Might be in a derived context that only sets local variables + # rather than pushing a context. Local variables follow the scheme + # l_depth_name. Find the highest-depth local that has a value for + # each name. + local_overrides: t.Dict[str, t.Tuple[int, t.Any]] = {} + + for name, value in real_locals.items(): + if not name.startswith("l_") or value is missing: + # Not a template variable, or no longer relevant. + continue + + try: + _, depth_str, name = name.split("_", 2) + depth = int(depth_str) + except ValueError: + continue + + cur_depth = local_overrides.get(name, (-1,))[0] + + if cur_depth < depth: + local_overrides[name] = (depth, value) + + # Modify the context with any derived context. + for name, (_, value) in local_overrides.items(): + if value is missing: + data.pop(name, None) + else: + data[name] = value + + return data diff --git a/jinja2/defaults.py b/jinja2/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..638cad3d2d8907330bde56e2b76c9b185c523b45 --- /dev/null +++ b/jinja2/defaults.py @@ -0,0 +1,48 @@ +import typing as t + +from .filters import FILTERS as DEFAULT_FILTERS # noqa: F401 +from .tests import TESTS as DEFAULT_TESTS # noqa: F401 +from .utils import Cycler +from .utils import generate_lorem_ipsum +from .utils import Joiner +from .utils import Namespace + +if t.TYPE_CHECKING: + import typing_extensions as te + +# defaults for the parser / lexer +BLOCK_START_STRING = "{%" +BLOCK_END_STRING = "%}" +VARIABLE_START_STRING = "{{" +VARIABLE_END_STRING = "}}" +COMMENT_START_STRING = "{#" +COMMENT_END_STRING = "#}" +LINE_STATEMENT_PREFIX: t.Optional[str] = None +LINE_COMMENT_PREFIX: t.Optional[str] = None +TRIM_BLOCKS = False +LSTRIP_BLOCKS = False +NEWLINE_SEQUENCE: "te.Literal['\\n', '\\r\\n', '\\r']" = "\n" +KEEP_TRAILING_NEWLINE = False + +# default filters, tests and namespace + +DEFAULT_NAMESPACE = { + "range": range, + "dict": dict, + "lipsum": generate_lorem_ipsum, + "cycler": Cycler, + "joiner": Joiner, + "namespace": Namespace, +} + +# default policies +DEFAULT_POLICIES: t.Dict[str, t.Any] = { + "compiler.ascii_str": True, + "urlize.rel": "noopener", + "urlize.target": None, + "urlize.extra_schemes": None, + "truncate.leeway": 5, + "json.dumps_function": None, + "json.dumps_kwargs": {"sort_keys": True}, + "ext.i18n.trimmed": False, +} diff --git a/jinja2/environment.py b/jinja2/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0fc6e5be87ab8273f6056ddfede07e1be28f1495 --- /dev/null +++ b/jinja2/environment.py @@ -0,0 +1,1672 @@ +"""Classes for managing templates and their runtime and compile time +options. +""" + +import os +import typing +import typing as t +import weakref +from collections import ChainMap +from functools import lru_cache +from functools import partial +from functools import reduce +from types import CodeType + +from markupsafe import Markup + +from . import nodes +from .compiler import CodeGenerator +from .compiler import generate +from .defaults import BLOCK_END_STRING +from .defaults import BLOCK_START_STRING +from .defaults import COMMENT_END_STRING +from .defaults import COMMENT_START_STRING +from .defaults import DEFAULT_FILTERS # type: ignore[attr-defined] +from .defaults import DEFAULT_NAMESPACE +from .defaults import DEFAULT_POLICIES +from .defaults import DEFAULT_TESTS # type: ignore[attr-defined] +from .defaults import KEEP_TRAILING_NEWLINE +from .defaults import LINE_COMMENT_PREFIX +from .defaults import LINE_STATEMENT_PREFIX +from .defaults import LSTRIP_BLOCKS +from .defaults import NEWLINE_SEQUENCE +from .defaults import TRIM_BLOCKS +from .defaults import VARIABLE_END_STRING +from .defaults import VARIABLE_START_STRING +from .exceptions import TemplateNotFound +from .exceptions import TemplateRuntimeError +from .exceptions import TemplatesNotFound +from .exceptions import TemplateSyntaxError +from .exceptions import UndefinedError +from .lexer import get_lexer +from .lexer import Lexer +from .lexer import TokenStream +from .nodes import EvalContext +from .parser import Parser +from .runtime import Context +from .runtime import new_context +from .runtime import Undefined +from .utils import _PassArg +from .utils import concat +from .utils import consume +from .utils import import_string +from .utils import internalcode +from .utils import LRUCache +from .utils import missing + +if t.TYPE_CHECKING: + import typing_extensions as te + + from .bccache import BytecodeCache + from .ext import Extension + from .loaders import BaseLoader + +_env_bound = t.TypeVar("_env_bound", bound="Environment") + + +# for direct template usage we have up to ten living environments +@lru_cache(maxsize=10) +def get_spontaneous_environment(cls: t.Type[_env_bound], *args: t.Any) -> _env_bound: + """Return a new spontaneous environment. A spontaneous environment + is used for templates created directly rather than through an + existing environment. + + :param cls: Environment class to create. + :param args: Positional arguments passed to environment. + """ + env = cls(*args) + env.shared = True + return env + + +def create_cache( + size: int, +) -> t.Optional[t.MutableMapping[t.Tuple["weakref.ref[t.Any]", str], "Template"]]: + """Return the cache class for the given size.""" + if size == 0: + return None + + if size < 0: + return {} + + return LRUCache(size) # type: ignore + + +def copy_cache( + cache: t.Optional[t.MutableMapping[t.Any, t.Any]], +) -> t.Optional[t.MutableMapping[t.Tuple["weakref.ref[t.Any]", str], "Template"]]: + """Create an empty copy of the given cache.""" + if cache is None: + return None + + if type(cache) is dict: # noqa E721 + return {} + + return LRUCache(cache.capacity) # type: ignore + + +def load_extensions( + environment: "Environment", + extensions: t.Sequence[t.Union[str, t.Type["Extension"]]], +) -> t.Dict[str, "Extension"]: + """Load the extensions from the list and bind it to the environment. + Returns a dict of instantiated extensions. + """ + result = {} + + for extension in extensions: + if isinstance(extension, str): + extension = t.cast(t.Type["Extension"], import_string(extension)) + + result[extension.identifier] = extension(environment) + + return result + + +def _environment_config_check(environment: _env_bound) -> _env_bound: + """Perform a sanity check on the environment.""" + assert issubclass( + environment.undefined, Undefined + ), "'undefined' must be a subclass of 'jinja2.Undefined'." + assert ( + environment.block_start_string + != environment.variable_start_string + != environment.comment_start_string + ), "block, variable and comment start strings must be different." + assert environment.newline_sequence in { + "\r", + "\r\n", + "\n", + }, "'newline_sequence' must be one of '\\n', '\\r\\n', or '\\r'." + return environment + + +class Environment: + r"""The core component of Jinja is the `Environment`. It contains + important shared variables like configuration, filters, tests, + globals and others. Instances of this class may be modified if + they are not shared and if no template was loaded so far. + Modifications on environments after the first template was loaded + will lead to surprising effects and undefined behavior. + + Here are the possible initialization parameters: + + `block_start_string` + The string marking the beginning of a block. Defaults to ``'{%'``. + + `block_end_string` + The string marking the end of a block. Defaults to ``'%}'``. + + `variable_start_string` + The string marking the beginning of a print statement. + Defaults to ``'{{'``. + + `variable_end_string` + The string marking the end of a print statement. Defaults to + ``'}}'``. + + `comment_start_string` + The string marking the beginning of a comment. Defaults to ``'{#'``. + + `comment_end_string` + The string marking the end of a comment. Defaults to ``'#}'``. + + `line_statement_prefix` + If given and a string, this will be used as prefix for line based + statements. See also :ref:`line-statements`. + + `line_comment_prefix` + If given and a string, this will be used as prefix for line based + comments. See also :ref:`line-statements`. + + .. versionadded:: 2.2 + + `trim_blocks` + If this is set to ``True`` the first newline after a block is + removed (block, not variable tag!). Defaults to `False`. + + `lstrip_blocks` + If this is set to ``True`` leading spaces and tabs are stripped + from the start of a line to a block. Defaults to `False`. + + `newline_sequence` + The sequence that starts a newline. Must be one of ``'\r'``, + ``'\n'`` or ``'\r\n'``. The default is ``'\n'`` which is a + useful default for Linux and OS X systems as well as web + applications. + + `keep_trailing_newline` + Preserve the trailing newline when rendering templates. + The default is ``False``, which causes a single newline, + if present, to be stripped from the end of the template. + + .. versionadded:: 2.7 + + `extensions` + List of Jinja extensions to use. This can either be import paths + as strings or extension classes. For more information have a + look at :ref:`the extensions documentation `. + + `optimized` + should the optimizer be enabled? Default is ``True``. + + `undefined` + :class:`Undefined` or a subclass of it that is used to represent + undefined values in the template. + + `finalize` + A callable that can be used to process the result of a variable + expression before it is output. For example one can convert + ``None`` implicitly into an empty string here. + + `autoescape` + If set to ``True`` the XML/HTML autoescaping feature is enabled by + default. For more details about autoescaping see + :class:`~markupsafe.Markup`. As of Jinja 2.4 this can also + be a callable that is passed the template name and has to + return ``True`` or ``False`` depending on autoescape should be + enabled by default. + + .. versionchanged:: 2.4 + `autoescape` can now be a function + + `loader` + The template loader for this environment. + + `cache_size` + The size of the cache. Per default this is ``400`` which means + that if more than 400 templates are loaded the loader will clean + out the least recently used template. If the cache size is set to + ``0`` templates are recompiled all the time, if the cache size is + ``-1`` the cache will not be cleaned. + + .. versionchanged:: 2.8 + The cache size was increased to 400 from a low 50. + + `auto_reload` + Some loaders load templates from locations where the template + sources may change (ie: file system or database). If + ``auto_reload`` is set to ``True`` (default) every time a template is + requested the loader checks if the source changed and if yes, it + will reload the template. For higher performance it's possible to + disable that. + + `bytecode_cache` + If set to a bytecode cache object, this object will provide a + cache for the internal Jinja bytecode so that templates don't + have to be parsed if they were not changed. + + See :ref:`bytecode-cache` for more information. + + `enable_async` + If set to true this enables async template execution which + allows using async functions and generators. + """ + + #: if this environment is sandboxed. Modifying this variable won't make + #: the environment sandboxed though. For a real sandboxed environment + #: have a look at jinja2.sandbox. This flag alone controls the code + #: generation by the compiler. + sandboxed = False + + #: True if the environment is just an overlay + overlayed = False + + #: the environment this environment is linked to if it is an overlay + linked_to: t.Optional["Environment"] = None + + #: shared environments have this set to `True`. A shared environment + #: must not be modified + shared = False + + #: the class that is used for code generation. See + #: :class:`~jinja2.compiler.CodeGenerator` for more information. + code_generator_class: t.Type["CodeGenerator"] = CodeGenerator + + concat = "".join + + #: the context class that is used for templates. See + #: :class:`~jinja2.runtime.Context` for more information. + context_class: t.Type[Context] = Context + + template_class: t.Type["Template"] + + def __init__( + self, + block_start_string: str = BLOCK_START_STRING, + block_end_string: str = BLOCK_END_STRING, + variable_start_string: str = VARIABLE_START_STRING, + variable_end_string: str = VARIABLE_END_STRING, + comment_start_string: str = COMMENT_START_STRING, + comment_end_string: str = COMMENT_END_STRING, + line_statement_prefix: t.Optional[str] = LINE_STATEMENT_PREFIX, + line_comment_prefix: t.Optional[str] = LINE_COMMENT_PREFIX, + trim_blocks: bool = TRIM_BLOCKS, + lstrip_blocks: bool = LSTRIP_BLOCKS, + newline_sequence: "te.Literal['\\n', '\\r\\n', '\\r']" = NEWLINE_SEQUENCE, + keep_trailing_newline: bool = KEEP_TRAILING_NEWLINE, + extensions: t.Sequence[t.Union[str, t.Type["Extension"]]] = (), + optimized: bool = True, + undefined: t.Type[Undefined] = Undefined, + finalize: t.Optional[t.Callable[..., t.Any]] = None, + autoescape: t.Union[bool, t.Callable[[t.Optional[str]], bool]] = False, + loader: t.Optional["BaseLoader"] = None, + cache_size: int = 400, + auto_reload: bool = True, + bytecode_cache: t.Optional["BytecodeCache"] = None, + enable_async: bool = False, + ): + # !!Important notice!! + # The constructor accepts quite a few arguments that should be + # passed by keyword rather than position. However it's important to + # not change the order of arguments because it's used at least + # internally in those cases: + # - spontaneous environments (i18n extension and Template) + # - unittests + # If parameter changes are required only add parameters at the end + # and don't change the arguments (or the defaults!) of the arguments + # existing already. + + # lexer / parser information + self.block_start_string = block_start_string + self.block_end_string = block_end_string + self.variable_start_string = variable_start_string + self.variable_end_string = variable_end_string + self.comment_start_string = comment_start_string + self.comment_end_string = comment_end_string + self.line_statement_prefix = line_statement_prefix + self.line_comment_prefix = line_comment_prefix + self.trim_blocks = trim_blocks + self.lstrip_blocks = lstrip_blocks + self.newline_sequence = newline_sequence + self.keep_trailing_newline = keep_trailing_newline + + # runtime information + self.undefined: t.Type[Undefined] = undefined + self.optimized = optimized + self.finalize = finalize + self.autoescape = autoescape + + # defaults + self.filters = DEFAULT_FILTERS.copy() + self.tests = DEFAULT_TESTS.copy() + self.globals = DEFAULT_NAMESPACE.copy() + + # set the loader provided + self.loader = loader + self.cache = create_cache(cache_size) + self.bytecode_cache = bytecode_cache + self.auto_reload = auto_reload + + # configurable policies + self.policies = DEFAULT_POLICIES.copy() + + # load extensions + self.extensions = load_extensions(self, extensions) + + self.is_async = enable_async + _environment_config_check(self) + + def add_extension(self, extension: t.Union[str, t.Type["Extension"]]) -> None: + """Adds an extension after the environment was created. + + .. versionadded:: 2.5 + """ + self.extensions.update(load_extensions(self, [extension])) + + def extend(self, **attributes: t.Any) -> None: + """Add the items to the instance of the environment if they do not exist + yet. This is used by :ref:`extensions ` to register + callbacks and configuration values without breaking inheritance. + """ + for key, value in attributes.items(): + if not hasattr(self, key): + setattr(self, key, value) + + def overlay( + self, + block_start_string: str = missing, + block_end_string: str = missing, + variable_start_string: str = missing, + variable_end_string: str = missing, + comment_start_string: str = missing, + comment_end_string: str = missing, + line_statement_prefix: t.Optional[str] = missing, + line_comment_prefix: t.Optional[str] = missing, + trim_blocks: bool = missing, + lstrip_blocks: bool = missing, + newline_sequence: "te.Literal['\\n', '\\r\\n', '\\r']" = missing, + keep_trailing_newline: bool = missing, + extensions: t.Sequence[t.Union[str, t.Type["Extension"]]] = missing, + optimized: bool = missing, + undefined: t.Type[Undefined] = missing, + finalize: t.Optional[t.Callable[..., t.Any]] = missing, + autoescape: t.Union[bool, t.Callable[[t.Optional[str]], bool]] = missing, + loader: t.Optional["BaseLoader"] = missing, + cache_size: int = missing, + auto_reload: bool = missing, + bytecode_cache: t.Optional["BytecodeCache"] = missing, + enable_async: bool = missing, + ) -> "te.Self": + """Create a new overlay environment that shares all the data with the + current environment except for cache and the overridden attributes. + Extensions cannot be removed for an overlayed environment. An overlayed + environment automatically gets all the extensions of the environment it + is linked to plus optional extra extensions. + + Creating overlays should happen after the initial environment was set + up completely. Not all attributes are truly linked, some are just + copied over so modifications on the original environment may not shine + through. + + .. versionchanged:: 3.1.5 + ``enable_async`` is applied correctly. + + .. versionchanged:: 3.1.2 + Added the ``newline_sequence``, ``keep_trailing_newline``, + and ``enable_async`` parameters to match ``__init__``. + """ + args = dict(locals()) + del args["self"], args["cache_size"], args["extensions"], args["enable_async"] + + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.overlayed = True + rv.linked_to = self + + for key, value in args.items(): + if value is not missing: + setattr(rv, key, value) + + if cache_size is not missing: + rv.cache = create_cache(cache_size) + else: + rv.cache = copy_cache(self.cache) + + rv.extensions = {} + for key, value in self.extensions.items(): + rv.extensions[key] = value.bind(rv) + if extensions is not missing: + rv.extensions.update(load_extensions(rv, extensions)) + + if enable_async is not missing: + rv.is_async = enable_async + + return _environment_config_check(rv) + + @property + def lexer(self) -> Lexer: + """The lexer for this environment.""" + return get_lexer(self) + + def iter_extensions(self) -> t.Iterator["Extension"]: + """Iterates over the extensions by priority.""" + return iter(sorted(self.extensions.values(), key=lambda x: x.priority)) + + def getitem( + self, obj: t.Any, argument: t.Union[str, t.Any] + ) -> t.Union[t.Any, Undefined]: + """Get an item or attribute of an object but prefer the item.""" + try: + return obj[argument] + except (AttributeError, TypeError, LookupError): + if isinstance(argument, str): + try: + attr = str(argument) + except Exception: + pass + else: + try: + return getattr(obj, attr) + except AttributeError: + pass + return self.undefined(obj=obj, name=argument) + + def getattr(self, obj: t.Any, attribute: str) -> t.Any: + """Get an item or attribute of an object but prefer the attribute. + Unlike :meth:`getitem` the attribute *must* be a string. + """ + try: + return getattr(obj, attribute) + except AttributeError: + pass + try: + return obj[attribute] + except (TypeError, LookupError, AttributeError): + return self.undefined(obj=obj, name=attribute) + + def _filter_test_common( + self, + name: t.Union[str, Undefined], + value: t.Any, + args: t.Optional[t.Sequence[t.Any]], + kwargs: t.Optional[t.Mapping[str, t.Any]], + context: t.Optional[Context], + eval_ctx: t.Optional[EvalContext], + is_filter: bool, + ) -> t.Any: + if is_filter: + env_map = self.filters + type_name = "filter" + else: + env_map = self.tests + type_name = "test" + + func = env_map.get(name) # type: ignore + + if func is None: + msg = f"No {type_name} named {name!r}." + + if isinstance(name, Undefined): + try: + name._fail_with_undefined_error() + except Exception as e: + msg = f"{msg} ({e}; did you forget to quote the callable name?)" + + raise TemplateRuntimeError(msg) + + args = [value, *(args if args is not None else ())] + kwargs = kwargs if kwargs is not None else {} + pass_arg = _PassArg.from_obj(func) + + if pass_arg is _PassArg.context: + if context is None: + raise TemplateRuntimeError( + f"Attempted to invoke a context {type_name} without context." + ) + + args.insert(0, context) + elif pass_arg is _PassArg.eval_context: + if eval_ctx is None: + if context is not None: + eval_ctx = context.eval_ctx + else: + eval_ctx = EvalContext(self) + + args.insert(0, eval_ctx) + elif pass_arg is _PassArg.environment: + args.insert(0, self) + + return func(*args, **kwargs) + + def call_filter( + self, + name: str, + value: t.Any, + args: t.Optional[t.Sequence[t.Any]] = None, + kwargs: t.Optional[t.Mapping[str, t.Any]] = None, + context: t.Optional[Context] = None, + eval_ctx: t.Optional[EvalContext] = None, + ) -> t.Any: + """Invoke a filter on a value the same way the compiler does. + + This might return a coroutine if the filter is running from an + environment in async mode and the filter supports async + execution. It's your responsibility to await this if needed. + + .. versionadded:: 2.7 + """ + return self._filter_test_common( + name, value, args, kwargs, context, eval_ctx, True + ) + + def call_test( + self, + name: str, + value: t.Any, + args: t.Optional[t.Sequence[t.Any]] = None, + kwargs: t.Optional[t.Mapping[str, t.Any]] = None, + context: t.Optional[Context] = None, + eval_ctx: t.Optional[EvalContext] = None, + ) -> t.Any: + """Invoke a test on a value the same way the compiler does. + + This might return a coroutine if the test is running from an + environment in async mode and the test supports async execution. + It's your responsibility to await this if needed. + + .. versionchanged:: 3.0 + Tests support ``@pass_context``, etc. decorators. Added + the ``context`` and ``eval_ctx`` parameters. + + .. versionadded:: 2.7 + """ + return self._filter_test_common( + name, value, args, kwargs, context, eval_ctx, False + ) + + @internalcode + def parse( + self, + source: str, + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + ) -> nodes.Template: + """Parse the sourcecode and return the abstract syntax tree. This + tree of nodes is used by the compiler to convert the template into + executable source- or bytecode. This is useful for debugging or to + extract information from templates. + + If you are :ref:`developing Jinja extensions ` + this gives you a good overview of the node tree generated. + """ + try: + return self._parse(source, name, filename) + except TemplateSyntaxError: + self.handle_exception(source=source) + + def _parse( + self, source: str, name: t.Optional[str], filename: t.Optional[str] + ) -> nodes.Template: + """Internal parsing function used by `parse` and `compile`.""" + return Parser(self, source, name, filename).parse() + + def lex( + self, + source: str, + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + ) -> t.Iterator[t.Tuple[int, str, str]]: + """Lex the given sourcecode and return a generator that yields + tokens as tuples in the form ``(lineno, token_type, value)``. + This can be useful for :ref:`extension development ` + and debugging templates. + + This does not perform preprocessing. If you want the preprocessing + of the extensions to be applied you have to filter source through + the :meth:`preprocess` method. + """ + source = str(source) + try: + return self.lexer.tokeniter(source, name, filename) + except TemplateSyntaxError: + self.handle_exception(source=source) + + def preprocess( + self, + source: str, + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + ) -> str: + """Preprocesses the source with all extensions. This is automatically + called for all parsing and compiling methods but *not* for :meth:`lex` + because there you usually only want the actual source tokenized. + """ + return reduce( + lambda s, e: e.preprocess(s, name, filename), + self.iter_extensions(), + str(source), + ) + + def _tokenize( + self, + source: str, + name: t.Optional[str], + filename: t.Optional[str] = None, + state: t.Optional[str] = None, + ) -> TokenStream: + """Called by the parser to do the preprocessing and filtering + for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. + """ + source = self.preprocess(source, name, filename) + stream = self.lexer.tokenize(source, name, filename, state) + + for ext in self.iter_extensions(): + stream = ext.filter_stream(stream) # type: ignore + + if not isinstance(stream, TokenStream): + stream = TokenStream(stream, name, filename) + + return stream + + def _generate( + self, + source: nodes.Template, + name: t.Optional[str], + filename: t.Optional[str], + defer_init: bool = False, + ) -> str: + """Internal hook that can be overridden to hook a different generate + method in. + + .. versionadded:: 2.5 + """ + return generate( # type: ignore + source, + self, + name, + filename, + defer_init=defer_init, + optimized=self.optimized, + ) + + def _compile(self, source: str, filename: str) -> CodeType: + """Internal hook that can be overridden to hook a different compile + method in. + + .. versionadded:: 2.5 + """ + return compile(source, filename, "exec") + + @typing.overload + def compile( + self, + source: t.Union[str, nodes.Template], + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + raw: "te.Literal[False]" = False, + defer_init: bool = False, + ) -> CodeType: ... + + @typing.overload + def compile( + self, + source: t.Union[str, nodes.Template], + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + raw: "te.Literal[True]" = ..., + defer_init: bool = False, + ) -> str: ... + + @internalcode + def compile( + self, + source: t.Union[str, nodes.Template], + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + raw: bool = False, + defer_init: bool = False, + ) -> t.Union[str, CodeType]: + """Compile a node or template source code. The `name` parameter is + the load name of the template after it was joined using + :meth:`join_path` if necessary, not the filename on the file system. + the `filename` parameter is the estimated filename of the template on + the file system. If the template came from a database or memory this + can be omitted. + + The return value of this method is a python code object. If the `raw` + parameter is `True` the return value will be a string with python + code equivalent to the bytecode returned otherwise. This method is + mainly used internally. + + `defer_init` is use internally to aid the module code generator. This + causes the generated code to be able to import without the global + environment variable to be set. + + .. versionadded:: 2.4 + `defer_init` parameter added. + """ + source_hint = None + try: + if isinstance(source, str): + source_hint = source + source = self._parse(source, name, filename) + source = self._generate(source, name, filename, defer_init=defer_init) + if raw: + return source + if filename is None: + filename = "