Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/INSTALLER +1 -0
- venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/METADATA +159 -0
- venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/RECORD +18 -0
- venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/WHEEL +4 -0
- venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/licenses/LICENSE +73 -0
- venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_exceptions.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_formatting.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_suppress.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_version.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/INSTALLER +1 -0
- venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/METADATA +38 -0
- venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/RECORD +26 -0
- venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/WHEEL +4 -0
- venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/licenses/LICENSE +21 -0
- venv/lib/python3.10/site-packages/filelock/__init__.py +77 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/__init__.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_api.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_error.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_read_write.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_soft.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_unix.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_util.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/_windows.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/asyncio.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/__pycache__/version.cpython-310.pyc +0 -0
- venv/lib/python3.10/site-packages/filelock/_api.py +578 -0
- venv/lib/python3.10/site-packages/filelock/_error.py +30 -0
- venv/lib/python3.10/site-packages/filelock/_read_write.py +363 -0
- venv/lib/python3.10/site-packages/filelock/_soft.py +127 -0
- venv/lib/python3.10/site-packages/filelock/_unix.py +109 -0
- venv/lib/python3.10/site-packages/filelock/_util.py +53 -0
- venv/lib/python3.10/site-packages/filelock/_windows.py +100 -0
- venv/lib/python3.10/site-packages/filelock/asyncio.py +376 -0
- venv/lib/python3.10/site-packages/filelock/py.typed +0 -0
- venv/lib/python3.10/site-packages/filelock/version.py +34 -0
- venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/INSTALLER +1 -0
- venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/METADATA +257 -0
- venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/RECORD +119 -0
- venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/WHEEL +4 -0
- venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/licenses/LICENSE +29 -0
- venv/lib/python3.10/site-packages/fsspec/__init__.py +71 -0
- venv/lib/python3.10/site-packages/fsspec/_version.py +34 -0
- venv/lib/python3.10/site-packages/fsspec/archive.py +75 -0
- venv/lib/python3.10/site-packages/fsspec/asyn.py +1103 -0
- venv/lib/python3.10/site-packages/fsspec/caching.py +1004 -0
- venv/lib/python3.10/site-packages/fsspec/callbacks.py +324 -0
- venv/lib/python3.10/site-packages/fsspec/compression.py +185 -0
- venv/lib/python3.10/site-packages/fsspec/config.py +131 -0
- venv/lib/python3.10/site-packages/fsspec/conftest.py +125 -0
- venv/lib/python3.10/site-packages/fsspec/core.py +760 -0
venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/METADATA
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: exceptiongroup
|
| 3 |
+
Version: 1.3.1
|
| 4 |
+
Summary: Backport of PEP 654 (exception groups)
|
| 5 |
+
Author-email: Alex Grönholm <alex.gronholm@nextday.fi>
|
| 6 |
+
Requires-Python: >=3.7
|
| 7 |
+
Description-Content-Type: text/x-rst
|
| 8 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 9 |
+
Classifier: Intended Audience :: Developers
|
| 10 |
+
Classifier: License :: OSI Approved :: MIT License
|
| 11 |
+
Classifier: Programming Language :: Python
|
| 12 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 13 |
+
Classifier: Typing :: Typed
|
| 14 |
+
License-File: LICENSE
|
| 15 |
+
Requires-Dist: typing-extensions >= 4.6.0; python_version < '3.13'
|
| 16 |
+
Requires-Dist: pytest >= 6 ; extra == "test"
|
| 17 |
+
Project-URL: Changelog, https://github.com/agronholm/exceptiongroup/blob/main/CHANGES.rst
|
| 18 |
+
Project-URL: Issue Tracker, https://github.com/agronholm/exceptiongroup/issues
|
| 19 |
+
Project-URL: Source code, https://github.com/agronholm/exceptiongroup
|
| 20 |
+
Provides-Extra: test
|
| 21 |
+
|
| 22 |
+
.. image:: https://github.com/agronholm/exceptiongroup/actions/workflows/test.yml/badge.svg
|
| 23 |
+
:target: https://github.com/agronholm/exceptiongroup/actions/workflows/test.yml
|
| 24 |
+
:alt: Build Status
|
| 25 |
+
.. image:: https://coveralls.io/repos/github/agronholm/exceptiongroup/badge.svg?branch=main
|
| 26 |
+
:target: https://coveralls.io/github/agronholm/exceptiongroup?branch=main
|
| 27 |
+
:alt: Code Coverage
|
| 28 |
+
|
| 29 |
+
This is a backport of the ``BaseExceptionGroup`` and ``ExceptionGroup`` classes from
|
| 30 |
+
Python 3.11.
|
| 31 |
+
|
| 32 |
+
It contains the following:
|
| 33 |
+
|
| 34 |
+
* The ``exceptiongroup.BaseExceptionGroup`` and ``exceptiongroup.ExceptionGroup``
|
| 35 |
+
classes
|
| 36 |
+
* A utility function (``exceptiongroup.catch()``) for catching exceptions possibly
|
| 37 |
+
nested in an exception group
|
| 38 |
+
* Patches to the ``TracebackException`` class that properly formats exception groups
|
| 39 |
+
(installed on import)
|
| 40 |
+
* An exception hook that handles formatting of exception groups through
|
| 41 |
+
``TracebackException`` (installed on import)
|
| 42 |
+
* Special versions of some of the functions from the ``traceback`` module, modified to
|
| 43 |
+
correctly handle exception groups even when monkey patching is disabled, or blocked by
|
| 44 |
+
another custom exception hook:
|
| 45 |
+
|
| 46 |
+
* ``traceback.format_exception()``
|
| 47 |
+
* ``traceback.format_exception_only()``
|
| 48 |
+
* ``traceback.print_exception()``
|
| 49 |
+
* ``traceback.print_exc()``
|
| 50 |
+
* A backported version of ``contextlib.suppress()`` from Python 3.12.1 which also
|
| 51 |
+
handles suppressing exceptions inside exception groups
|
| 52 |
+
|
| 53 |
+
If this package is imported on Python 3.11 or later, the built-in implementations of the
|
| 54 |
+
exception group classes are used instead, ``TracebackException`` is not monkey patched
|
| 55 |
+
and the exception hook won't be installed.
|
| 56 |
+
|
| 57 |
+
See the `standard library documentation`_ for more information on exception groups.
|
| 58 |
+
|
| 59 |
+
.. _standard library documentation: https://docs.python.org/3/library/exceptions.html
|
| 60 |
+
|
| 61 |
+
Catching exceptions
|
| 62 |
+
===================
|
| 63 |
+
|
| 64 |
+
Due to the lack of the ``except*`` syntax introduced by `PEP 654`_ in earlier Python
|
| 65 |
+
versions, you need to use ``exceptiongroup.catch()`` to catch exceptions that are
|
| 66 |
+
potentially nested inside an exception group. This function returns a context manager
|
| 67 |
+
that calls the given handler for any exceptions matching the sole argument.
|
| 68 |
+
|
| 69 |
+
The argument to ``catch()`` must be a dict (or any ``Mapping``) where each key is either
|
| 70 |
+
an exception class or an iterable of exception classes. Each value must be a callable
|
| 71 |
+
that takes a single positional argument. The handler will be called at most once, with
|
| 72 |
+
an exception group as an argument which will contain all the exceptions that are any
|
| 73 |
+
of the given types, or their subclasses. The exception group may contain nested groups
|
| 74 |
+
containing more matching exceptions.
|
| 75 |
+
|
| 76 |
+
Thus, the following Python 3.11+ code:
|
| 77 |
+
|
| 78 |
+
.. code-block:: python
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
...
|
| 82 |
+
except* (ValueError, KeyError) as excgroup:
|
| 83 |
+
for exc in excgroup.exceptions:
|
| 84 |
+
print('Caught exception:', type(exc))
|
| 85 |
+
except* RuntimeError:
|
| 86 |
+
print('Caught runtime error')
|
| 87 |
+
|
| 88 |
+
would be written with this backport like this:
|
| 89 |
+
|
| 90 |
+
.. code-block:: python
|
| 91 |
+
|
| 92 |
+
from exceptiongroup import BaseExceptionGroup, catch
|
| 93 |
+
|
| 94 |
+
def value_key_err_handler(excgroup: BaseExceptionGroup) -> None:
|
| 95 |
+
for exc in excgroup.exceptions:
|
| 96 |
+
print('Caught exception:', type(exc))
|
| 97 |
+
|
| 98 |
+
def runtime_err_handler(exc: BaseExceptionGroup) -> None:
|
| 99 |
+
print('Caught runtime error')
|
| 100 |
+
|
| 101 |
+
with catch({
|
| 102 |
+
(ValueError, KeyError): value_key_err_handler,
|
| 103 |
+
RuntimeError: runtime_err_handler
|
| 104 |
+
}):
|
| 105 |
+
...
|
| 106 |
+
|
| 107 |
+
**NOTE**: Just like with ``except*``, you cannot handle ``BaseExceptionGroup`` or
|
| 108 |
+
``ExceptionGroup`` with ``catch()``.
|
| 109 |
+
|
| 110 |
+
Suppressing exceptions
|
| 111 |
+
======================
|
| 112 |
+
|
| 113 |
+
This library contains a backport of the ``contextlib.suppress()`` context manager from
|
| 114 |
+
Python 3.12.1. It allows you to selectively ignore certain exceptions, even when they're
|
| 115 |
+
inside exception groups:
|
| 116 |
+
|
| 117 |
+
.. code-block:: python
|
| 118 |
+
|
| 119 |
+
from exceptiongroup import suppress
|
| 120 |
+
|
| 121 |
+
with suppress(RuntimeError):
|
| 122 |
+
raise ExceptionGroup("", [RuntimeError("boo")])
|
| 123 |
+
|
| 124 |
+
Notes on monkey patching
|
| 125 |
+
========================
|
| 126 |
+
|
| 127 |
+
To make exception groups render properly when an unhandled exception group is being
|
| 128 |
+
printed out, this package does two things when it is imported on any Python version
|
| 129 |
+
earlier than 3.11:
|
| 130 |
+
|
| 131 |
+
#. The ``traceback.TracebackException`` class is monkey patched to store extra
|
| 132 |
+
information about exception groups (in ``__init__()``) and properly format them (in
|
| 133 |
+
``format()``)
|
| 134 |
+
#. An exception hook is installed at ``sys.excepthook``, provided that no other hook is
|
| 135 |
+
already present. This hook causes the exception to be formatted using
|
| 136 |
+
``traceback.TracebackException`` rather than the built-in rendered.
|
| 137 |
+
|
| 138 |
+
If ``sys.exceptionhook`` is found to be set to something else than the default when
|
| 139 |
+
``exceptiongroup`` is imported, no monkeypatching is done at all.
|
| 140 |
+
|
| 141 |
+
To prevent the exception hook and patches from being installed, set the environment
|
| 142 |
+
variable ``EXCEPTIONGROUP_NO_PATCH`` to ``1``.
|
| 143 |
+
|
| 144 |
+
Formatting exception groups
|
| 145 |
+
---------------------------
|
| 146 |
+
|
| 147 |
+
Normally, the monkey patching applied by this library on import will cause exception
|
| 148 |
+
groups to be printed properly in tracebacks. But in cases when the monkey patching is
|
| 149 |
+
blocked by a third party exception hook, or monkey patching is explicitly disabled,
|
| 150 |
+
you can still manually format exceptions using the special versions of the ``traceback``
|
| 151 |
+
functions, like ``format_exception()``, listed at the top of this page. They work just
|
| 152 |
+
like their counterparts in the ``traceback`` module, except that they use a separately
|
| 153 |
+
patched subclass of ``TracebackException`` to perform the rendering.
|
| 154 |
+
|
| 155 |
+
Particularly in cases where a library installs its own exception hook, it is recommended
|
| 156 |
+
to use these special versions to do the actual formatting of exceptions/tracebacks.
|
| 157 |
+
|
| 158 |
+
.. _PEP 654: https://www.python.org/dev/peps/pep-0654/
|
| 159 |
+
|
venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/RECORD
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
exceptiongroup-1.3.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
exceptiongroup-1.3.1.dist-info/METADATA,sha256=gZhKUjovelIq0SvqeEqLuF7ewIBeu9D7TjUBaaNt2AI,6725
|
| 3 |
+
exceptiongroup-1.3.1.dist-info/RECORD,,
|
| 4 |
+
exceptiongroup-1.3.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
| 5 |
+
exceptiongroup-1.3.1.dist-info/licenses/LICENSE,sha256=blBw12UDHgrUA6HL-Qrm0ZoCKPgC4yC3rP9GCqcu1Hw,3704
|
| 6 |
+
exceptiongroup/__init__.py,sha256=7DHS0hDk-RIs3IQc3SbZVB0-1MhiSCJ9XgvEyEloL7M,1049
|
| 7 |
+
exceptiongroup/__pycache__/__init__.cpython-310.pyc,,
|
| 8 |
+
exceptiongroup/__pycache__/_catch.cpython-310.pyc,,
|
| 9 |
+
exceptiongroup/__pycache__/_exceptions.cpython-310.pyc,,
|
| 10 |
+
exceptiongroup/__pycache__/_formatting.cpython-310.pyc,,
|
| 11 |
+
exceptiongroup/__pycache__/_suppress.cpython-310.pyc,,
|
| 12 |
+
exceptiongroup/__pycache__/_version.cpython-310.pyc,,
|
| 13 |
+
exceptiongroup/_catch.py,sha256=CaJez3E-Jkr-7B7RT3fzusdLWnuyeekooSFn7KyWt9s,4680
|
| 14 |
+
exceptiongroup/_exceptions.py,sha256=wPwPsZ64SXEptuwb4XrTIa1Mc78uqF5vmCrXTdllLn4,11463
|
| 15 |
+
exceptiongroup/_formatting.py,sha256=OYTuT_T6TzM8G2v3DVt8LRBwMNyNK0tNl0fKMls3chM,21063
|
| 16 |
+
exceptiongroup/_suppress.py,sha256=LX11PRNpchwfNWwEMY92nYN1F_5qFenQcS8EjIONXKE,1772
|
| 17 |
+
exceptiongroup/_version.py,sha256=-4u7pjQ4caDQqa-1Qgms81j5hpkXjmjUYRCVEaLmb88,704
|
| 18 |
+
exceptiongroup/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: flit 3.12.0
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
venv/lib/python3.10/site-packages/exceptiongroup-1.3.1.dist-info/licenses/LICENSE
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
The MIT License (MIT)
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2022 Alex Grönholm
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 6 |
+
this software and associated documentation files (the "Software"), to deal in
|
| 7 |
+
the Software without restriction, including without limitation the rights to
|
| 8 |
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
| 9 |
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
| 10 |
+
subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
| 17 |
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
| 18 |
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 19 |
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| 20 |
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
This project contains code copied from the Python standard library.
|
| 24 |
+
The following is the required license notice for those parts.
|
| 25 |
+
|
| 26 |
+
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
| 27 |
+
--------------------------------------------
|
| 28 |
+
|
| 29 |
+
1. This LICENSE AGREEMENT is between the Python Software Foundation
|
| 30 |
+
("PSF"), and the Individual or Organization ("Licensee") accessing and
|
| 31 |
+
otherwise using this software ("Python") in source or binary form and
|
| 32 |
+
its associated documentation.
|
| 33 |
+
|
| 34 |
+
2. Subject to the terms and conditions of this License Agreement, PSF hereby
|
| 35 |
+
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
|
| 36 |
+
analyze, test, perform and/or display publicly, prepare derivative works,
|
| 37 |
+
distribute, and otherwise use Python alone or in any derivative version,
|
| 38 |
+
provided, however, that PSF's License Agreement and PSF's notice of copyright,
|
| 39 |
+
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
| 40 |
+
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Python Software Foundation;
|
| 41 |
+
All Rights Reserved" are retained in Python alone or in any derivative version
|
| 42 |
+
prepared by Licensee.
|
| 43 |
+
|
| 44 |
+
3. In the event Licensee prepares a derivative work that is based on
|
| 45 |
+
or incorporates Python or any part thereof, and wants to make
|
| 46 |
+
the derivative work available to others as provided herein, then
|
| 47 |
+
Licensee hereby agrees to include in any such work a brief summary of
|
| 48 |
+
the changes made to Python.
|
| 49 |
+
|
| 50 |
+
4. PSF is making Python available to Licensee on an "AS IS"
|
| 51 |
+
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
| 52 |
+
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
|
| 53 |
+
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
|
| 54 |
+
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
|
| 55 |
+
INFRINGE ANY THIRD PARTY RIGHTS.
|
| 56 |
+
|
| 57 |
+
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
|
| 58 |
+
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
|
| 59 |
+
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
|
| 60 |
+
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
| 61 |
+
|
| 62 |
+
6. This License Agreement will automatically terminate upon a material
|
| 63 |
+
breach of its terms and conditions.
|
| 64 |
+
|
| 65 |
+
7. Nothing in this License Agreement shall be deemed to create any
|
| 66 |
+
relationship of agency, partnership, or joint venture between PSF and
|
| 67 |
+
Licensee. This License Agreement does not grant permission to use PSF
|
| 68 |
+
trademarks or trade name in a trademark sense to endorse or promote
|
| 69 |
+
products or services of Licensee, or any third party.
|
| 70 |
+
|
| 71 |
+
8. By copying, installing or otherwise using Python, Licensee
|
| 72 |
+
agrees to be bound by the terms and conditions of this License
|
| 73 |
+
Agreement.
|
venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_exceptions.cpython-310.pyc
ADDED
|
Binary file (10.3 kB). View file
|
|
|
venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_formatting.cpython-310.pyc
ADDED
|
Binary file (12.4 kB). View file
|
|
|
venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_suppress.cpython-310.pyc
ADDED
|
Binary file (1.67 kB). View file
|
|
|
venv/lib/python3.10/site-packages/exceptiongroup/__pycache__/_version.cpython-310.pyc
ADDED
|
Binary file (768 Bytes). View file
|
|
|
venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/METADATA
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: filelock
|
| 3 |
+
Version: 3.24.3
|
| 4 |
+
Summary: A platform independent file lock.
|
| 5 |
+
Project-URL: Documentation, https://py-filelock.readthedocs.io
|
| 6 |
+
Project-URL: Homepage, https://github.com/tox-dev/py-filelock
|
| 7 |
+
Project-URL: Source, https://github.com/tox-dev/py-filelock
|
| 8 |
+
Project-URL: Tracker, https://github.com/tox-dev/py-filelock/issues
|
| 9 |
+
Maintainer-email: Bernát Gábor <gaborjbernat@gmail.com>
|
| 10 |
+
License-Expression: MIT
|
| 11 |
+
License-File: LICENSE
|
| 12 |
+
Keywords: application,cache,directory,log,user
|
| 13 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 14 |
+
Classifier: Intended Audience :: Developers
|
| 15 |
+
Classifier: License :: OSI Approved :: MIT License
|
| 16 |
+
Classifier: Operating System :: OS Independent
|
| 17 |
+
Classifier: Programming Language :: Python
|
| 18 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 21 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 22 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 23 |
+
Classifier: Programming Language :: Python :: 3.14
|
| 24 |
+
Classifier: Topic :: Internet
|
| 25 |
+
Classifier: Topic :: Software Development :: Libraries
|
| 26 |
+
Classifier: Topic :: System
|
| 27 |
+
Requires-Python: >=3.10
|
| 28 |
+
Description-Content-Type: text/markdown
|
| 29 |
+
|
| 30 |
+
# filelock
|
| 31 |
+
|
| 32 |
+
[](https://pypi.org/project/filelock/)
|
| 33 |
+
[](https://pypi.org/project/filelock/)
|
| 34 |
+
[](https://py-filelock.readthedocs.io/en/latest/?badge=latest)
|
| 35 |
+
[](https://pepy.tech/project/filelock)
|
| 36 |
+
[](https://github.com/tox-dev/py-filelock/actions/workflows/check.yaml)
|
| 37 |
+
|
| 38 |
+
For more information checkout the [official documentation](https://py-filelock.readthedocs.io/en/latest/index.html).
|
venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/RECORD
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
filelock-3.24.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
filelock-3.24.3.dist-info/METADATA,sha256=2SzY43viecqbmVNOokZgXcYTE04sufy5Rjf6wZTMWC4,1977
|
| 3 |
+
filelock-3.24.3.dist-info/RECORD,,
|
| 4 |
+
filelock-3.24.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
| 5 |
+
filelock-3.24.3.dist-info/licenses/LICENSE,sha256=YIyJ1QYK6ZIa3M8yNmlbxlSplG4SMj72wCHfoE4pTUg,1088
|
| 6 |
+
filelock/__init__.py,sha256=12pKiL8AjSCkmWLozJhcCg6qQY70_XizYh9AHUvzZ1Q,2009
|
| 7 |
+
filelock/__pycache__/__init__.cpython-310.pyc,,
|
| 8 |
+
filelock/__pycache__/_api.cpython-310.pyc,,
|
| 9 |
+
filelock/__pycache__/_error.cpython-310.pyc,,
|
| 10 |
+
filelock/__pycache__/_read_write.cpython-310.pyc,,
|
| 11 |
+
filelock/__pycache__/_soft.cpython-310.pyc,,
|
| 12 |
+
filelock/__pycache__/_unix.cpython-310.pyc,,
|
| 13 |
+
filelock/__pycache__/_util.cpython-310.pyc,,
|
| 14 |
+
filelock/__pycache__/_windows.cpython-310.pyc,,
|
| 15 |
+
filelock/__pycache__/asyncio.cpython-310.pyc,,
|
| 16 |
+
filelock/__pycache__/version.cpython-310.pyc,,
|
| 17 |
+
filelock/_api.py,sha256=l8P7bqosgleI3vPn8OVGQ0-sa2d3WVhvM_j9SCCcTx4,21159
|
| 18 |
+
filelock/_error.py,sha256=mnelOh0EVyVeskG3rksL4kW3OArL4TMb2-PwmzQWIFg,788
|
| 19 |
+
filelock/_read_write.py,sha256=o6pcX04--nXFspDXtV_J113EDH_-rzz-Hvp-cmH76P4,15313
|
| 20 |
+
filelock/_soft.py,sha256=8aSSoyLZjBDV-ql3LJt_Ec_kg7ywHDE8ZZFcCtVWxQ8,4665
|
| 21 |
+
filelock/_unix.py,sha256=DL0mk9OAE21wY-uhdClJ2ezBUFfi-vNszdBJCw7slqE,4233
|
| 22 |
+
filelock/_util.py,sha256=wPf-LvzmdHWHw4DinDeVJlB0GNVOYGNUeNKSnHBQuUU,1716
|
| 23 |
+
filelock/_windows.py,sha256=-CEBIFfFPwmfSSmTmbbGzgUR30E0jBacBW9j4_ER0YM,3508
|
| 24 |
+
filelock/asyncio.py,sha256=NvrDsqS095NZc16l_OjBQcTY-D6xB4Vy7AK3ni8tr8A,13943
|
| 25 |
+
filelock/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 26 |
+
filelock/version.py,sha256=cEVcBJgzKOmekaY1NFrR1sXy1-sBY4F7inrTVfovZUA,706
|
venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: hatchling 1.28.0
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
venv/lib/python3.10/site-packages/filelock-3.24.3.dist-info/licenses/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Bernát Gábor and contributors
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
venv/lib/python3.10/site-packages/filelock/__init__.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
A platform independent file lock that supports the with-statement.
|
| 3 |
+
|
| 4 |
+
.. autodata:: filelock.__version__
|
| 5 |
+
:no-value:
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import sys
|
| 12 |
+
import warnings
|
| 13 |
+
from typing import TYPE_CHECKING
|
| 14 |
+
|
| 15 |
+
from ._api import AcquireReturnProxy, BaseFileLock
|
| 16 |
+
from ._error import Timeout
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from ._read_write import ReadWriteLock
|
| 20 |
+
except ImportError: # sqlite3 may be unavailable if Python was built without it or the C library is missing
|
| 21 |
+
ReadWriteLock = None # type: ignore[assignment, misc]
|
| 22 |
+
|
| 23 |
+
from ._soft import SoftFileLock
|
| 24 |
+
from ._unix import UnixFileLock, has_fcntl
|
| 25 |
+
from ._windows import WindowsFileLock
|
| 26 |
+
from .asyncio import (
|
| 27 |
+
AsyncAcquireReturnProxy,
|
| 28 |
+
AsyncSoftFileLock,
|
| 29 |
+
AsyncUnixFileLock,
|
| 30 |
+
AsyncWindowsFileLock,
|
| 31 |
+
BaseAsyncFileLock,
|
| 32 |
+
)
|
| 33 |
+
from .version import version
|
| 34 |
+
|
| 35 |
+
#: version of the project as a string
|
| 36 |
+
__version__: str = version
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 40 |
+
_FileLock: type[BaseFileLock] = WindowsFileLock
|
| 41 |
+
_AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock
|
| 42 |
+
else: # pragma: win32 no cover # noqa: PLR5501
|
| 43 |
+
if has_fcntl:
|
| 44 |
+
_FileLock: type[BaseFileLock] = UnixFileLock
|
| 45 |
+
_AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock
|
| 46 |
+
else:
|
| 47 |
+
_FileLock = SoftFileLock
|
| 48 |
+
_AsyncFileLock = AsyncSoftFileLock
|
| 49 |
+
if warnings is not None:
|
| 50 |
+
warnings.warn("only soft file lock is available", stacklevel=2)
|
| 51 |
+
|
| 52 |
+
if TYPE_CHECKING:
|
| 53 |
+
FileLock = SoftFileLock
|
| 54 |
+
AsyncFileLock = AsyncSoftFileLock
|
| 55 |
+
else:
|
| 56 |
+
#: Alias for the lock, which should be used for the current platform.
|
| 57 |
+
FileLock = _FileLock
|
| 58 |
+
AsyncFileLock = _AsyncFileLock
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
__all__ = [
|
| 62 |
+
"AcquireReturnProxy",
|
| 63 |
+
"AsyncAcquireReturnProxy",
|
| 64 |
+
"AsyncFileLock",
|
| 65 |
+
"AsyncSoftFileLock",
|
| 66 |
+
"AsyncUnixFileLock",
|
| 67 |
+
"AsyncWindowsFileLock",
|
| 68 |
+
"BaseAsyncFileLock",
|
| 69 |
+
"BaseFileLock",
|
| 70 |
+
"FileLock",
|
| 71 |
+
"ReadWriteLock",
|
| 72 |
+
"SoftFileLock",
|
| 73 |
+
"Timeout",
|
| 74 |
+
"UnixFileLock",
|
| 75 |
+
"WindowsFileLock",
|
| 76 |
+
"__version__",
|
| 77 |
+
]
|
venv/lib/python3.10/site-packages/filelock/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (1.59 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_api.cpython-310.pyc
ADDED
|
Binary file (18.2 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_error.cpython-310.pyc
ADDED
|
Binary file (1.56 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_read_write.cpython-310.pyc
ADDED
|
Binary file (12.9 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_soft.cpython-310.pyc
ADDED
|
Binary file (4.51 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_unix.cpython-310.pyc
ADDED
|
Binary file (3.36 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_util.cpython-310.pyc
ADDED
|
Binary file (1.62 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/_windows.cpython-310.pyc
ADDED
|
Binary file (2.97 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/asyncio.cpython-310.pyc
ADDED
|
Binary file (13.2 kB). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/__pycache__/version.cpython-310.pyc
ADDED
|
Binary file (762 Bytes). View file
|
|
|
venv/lib/python3.10/site-packages/filelock/_api.py
ADDED
|
@@ -0,0 +1,578 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import inspect
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
import pathlib
|
| 8 |
+
import sys
|
| 9 |
+
import time
|
| 10 |
+
import warnings
|
| 11 |
+
from abc import ABCMeta, abstractmethod
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
from threading import local
|
| 14 |
+
from typing import TYPE_CHECKING, Any, cast
|
| 15 |
+
from weakref import WeakValueDictionary
|
| 16 |
+
|
| 17 |
+
from ._error import Timeout
|
| 18 |
+
|
| 19 |
+
#: Sentinel indicating that no explicit file permission mode was passed.
|
| 20 |
+
#: When used, lock files are created with 0o666 (letting umask and default ACLs control the final permissions)
|
| 21 |
+
#: and fchmod is skipped so that POSIX default ACL inheritance is preserved.
|
| 22 |
+
_UNSET_FILE_MODE: int = -1
|
| 23 |
+
|
| 24 |
+
if TYPE_CHECKING:
|
| 25 |
+
from collections.abc import Callable
|
| 26 |
+
from types import TracebackType
|
| 27 |
+
|
| 28 |
+
from ._read_write import ReadWriteLock
|
| 29 |
+
|
| 30 |
+
if sys.version_info >= (3, 11): # pragma: no cover (py311+)
|
| 31 |
+
from typing import Self
|
| 32 |
+
else: # pragma: no cover (<py311)
|
| 33 |
+
from typing_extensions import Self
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
_LOGGER = logging.getLogger("filelock")
|
| 37 |
+
|
| 38 |
+
# On Windows os.path.realpath calls CreateFileW with share_mode=0, which blocks concurrent DeleteFileW and causes
|
| 39 |
+
# livelocks under threaded contention with SoftFileLock. os.path.abspath is purely string-based and avoids this.
|
| 40 |
+
_canonical = os.path.abspath if sys.platform == "win32" else os.path.realpath
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class _ThreadLocalRegistry(local):
|
| 44 |
+
def __init__(self) -> None:
|
| 45 |
+
super().__init__()
|
| 46 |
+
self.held: dict[str, int] = {}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
_registry = _ThreadLocalRegistry()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
|
| 53 |
+
# is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
|
| 54 |
+
# again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
|
| 55 |
+
class AcquireReturnProxy:
|
| 56 |
+
"""A context-aware object that will release the lock file when exiting."""
|
| 57 |
+
|
| 58 |
+
def __init__(self, lock: BaseFileLock | ReadWriteLock) -> None:
|
| 59 |
+
self.lock: BaseFileLock | ReadWriteLock = lock
|
| 60 |
+
|
| 61 |
+
def __enter__(self) -> BaseFileLock | ReadWriteLock:
|
| 62 |
+
return self.lock
|
| 63 |
+
|
| 64 |
+
def __exit__(
|
| 65 |
+
self,
|
| 66 |
+
exc_type: type[BaseException] | None,
|
| 67 |
+
exc_value: BaseException | None,
|
| 68 |
+
traceback: TracebackType | None,
|
| 69 |
+
) -> None:
|
| 70 |
+
self.lock.release()
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@dataclass
|
| 74 |
+
class FileLockContext:
|
| 75 |
+
"""A dataclass which holds the context for a ``BaseFileLock`` object."""
|
| 76 |
+
|
| 77 |
+
# The context is held in a separate class to allow optional use of thread local storage via the
|
| 78 |
+
# ThreadLocalFileContext class.
|
| 79 |
+
|
| 80 |
+
#: The path to the lock file.
|
| 81 |
+
lock_file: str
|
| 82 |
+
|
| 83 |
+
#: The default timeout value.
|
| 84 |
+
timeout: float
|
| 85 |
+
|
| 86 |
+
#: The mode for the lock files
|
| 87 |
+
mode: int
|
| 88 |
+
|
| 89 |
+
#: Whether the lock should be blocking or not
|
| 90 |
+
blocking: bool
|
| 91 |
+
|
| 92 |
+
#: The default polling interval value.
|
| 93 |
+
poll_interval: float
|
| 94 |
+
|
| 95 |
+
#: The lock lifetime in seconds; ``None`` means the lock never expires.
|
| 96 |
+
lifetime: float | None = None
|
| 97 |
+
|
| 98 |
+
#: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
|
| 99 |
+
lock_file_fd: int | None = None
|
| 100 |
+
|
| 101 |
+
#: The lock counter is used for implementing the nested locking mechanism.
|
| 102 |
+
lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class ThreadLocalFileContext(FileLockContext, local):
|
| 106 |
+
"""A thread local version of the ``FileLockContext`` class."""
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class FileLockMeta(ABCMeta):
|
| 110 |
+
_instances: WeakValueDictionary[str, BaseFileLock]
|
| 111 |
+
|
| 112 |
+
def __call__( # noqa: PLR0913
|
| 113 |
+
cls,
|
| 114 |
+
lock_file: str | os.PathLike[str],
|
| 115 |
+
timeout: float = -1,
|
| 116 |
+
mode: int = _UNSET_FILE_MODE,
|
| 117 |
+
thread_local: bool = True, # noqa: FBT001, FBT002
|
| 118 |
+
*,
|
| 119 |
+
blocking: bool = True,
|
| 120 |
+
is_singleton: bool = False,
|
| 121 |
+
poll_interval: float = 0.05,
|
| 122 |
+
lifetime: float | None = None,
|
| 123 |
+
**kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401
|
| 124 |
+
) -> BaseFileLock:
|
| 125 |
+
if is_singleton:
|
| 126 |
+
instance = cls._instances.get(str(lock_file))
|
| 127 |
+
if instance:
|
| 128 |
+
params_to_check = {
|
| 129 |
+
"thread_local": (thread_local, instance.is_thread_local()),
|
| 130 |
+
"timeout": (timeout, instance.timeout),
|
| 131 |
+
"mode": (mode, instance._context.mode), # noqa: SLF001
|
| 132 |
+
"blocking": (blocking, instance.blocking),
|
| 133 |
+
"poll_interval": (poll_interval, instance.poll_interval),
|
| 134 |
+
"lifetime": (lifetime, instance.lifetime),
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
non_matching_params = {
|
| 138 |
+
name: (passed_param, set_param)
|
| 139 |
+
for name, (passed_param, set_param) in params_to_check.items()
|
| 140 |
+
if passed_param != set_param
|
| 141 |
+
}
|
| 142 |
+
if not non_matching_params:
|
| 143 |
+
return cast("BaseFileLock", instance)
|
| 144 |
+
|
| 145 |
+
# parameters do not match; raise error
|
| 146 |
+
msg = "Singleton lock instances cannot be initialized with differing arguments"
|
| 147 |
+
msg += "\nNon-matching arguments: "
|
| 148 |
+
for param_name, (passed_param, set_param) in non_matching_params.items():
|
| 149 |
+
msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)"
|
| 150 |
+
raise ValueError(msg)
|
| 151 |
+
|
| 152 |
+
# Workaround to make `__init__`'s params optional in subclasses
|
| 153 |
+
# E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant
|
| 154 |
+
# (https://github.com/tox-dev/filelock/pull/340)
|
| 155 |
+
|
| 156 |
+
all_params = {
|
| 157 |
+
"timeout": timeout,
|
| 158 |
+
"mode": mode,
|
| 159 |
+
"thread_local": thread_local,
|
| 160 |
+
"blocking": blocking,
|
| 161 |
+
"is_singleton": is_singleton,
|
| 162 |
+
"poll_interval": poll_interval,
|
| 163 |
+
"lifetime": lifetime,
|
| 164 |
+
**kwargs,
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
present_params = inspect.signature(cls.__init__).parameters
|
| 168 |
+
init_params = {key: value for key, value in all_params.items() if key in present_params}
|
| 169 |
+
|
| 170 |
+
instance = super().__call__(lock_file, **init_params)
|
| 171 |
+
|
| 172 |
+
if is_singleton:
|
| 173 |
+
cls._instances[str(lock_file)] = instance
|
| 174 |
+
|
| 175 |
+
return cast("BaseFileLock", instance)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta):
|
| 179 |
+
"""
|
| 180 |
+
Abstract base class for a file lock object.
|
| 181 |
+
|
| 182 |
+
Provides a reentrant, cross-process exclusive lock backed by OS-level primitives. Subclasses implement the actual
|
| 183 |
+
locking mechanism (:class:`UnixFileLock <filelock.UnixFileLock>`, :class:`WindowsFileLock
|
| 184 |
+
<filelock.WindowsFileLock>`, :class:`SoftFileLock <filelock.SoftFileLock>`).
|
| 185 |
+
|
| 186 |
+
"""
|
| 187 |
+
|
| 188 |
+
_instances: WeakValueDictionary[str, BaseFileLock]
|
| 189 |
+
|
| 190 |
+
def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None:
|
| 191 |
+
"""Setup unique state for lock subclasses."""
|
| 192 |
+
super().__init_subclass__(**kwargs)
|
| 193 |
+
cls._instances = WeakValueDictionary()
|
| 194 |
+
|
| 195 |
+
def __init__( # noqa: PLR0913
|
| 196 |
+
self,
|
| 197 |
+
lock_file: str | os.PathLike[str],
|
| 198 |
+
timeout: float = -1,
|
| 199 |
+
mode: int = _UNSET_FILE_MODE,
|
| 200 |
+
thread_local: bool = True, # noqa: FBT001, FBT002
|
| 201 |
+
*,
|
| 202 |
+
blocking: bool = True,
|
| 203 |
+
is_singleton: bool = False,
|
| 204 |
+
poll_interval: float = 0.05,
|
| 205 |
+
lifetime: float | None = None,
|
| 206 |
+
) -> None:
|
| 207 |
+
"""
|
| 208 |
+
Create a new lock object.
|
| 209 |
+
|
| 210 |
+
:param lock_file: path to the file
|
| 211 |
+
:param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in the
|
| 212 |
+
acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it to a
|
| 213 |
+
negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
|
| 214 |
+
:param mode: file permissions for the lockfile. When not specified, the OS controls permissions via umask and
|
| 215 |
+
default ACLs, preserving POSIX default ACL inheritance in shared directories.
|
| 216 |
+
:param thread_local: Whether this object's internal context should be thread local or not. If this is set to
|
| 217 |
+
``False`` then the lock will be reentrant across threads.
|
| 218 |
+
:param blocking: whether the lock should be blocking or not
|
| 219 |
+
:param is_singleton: If this is set to ``True`` then only one instance of this class will be created per lock
|
| 220 |
+
file. This is useful if you want to use the lock object for reentrant locking without needing to pass the
|
| 221 |
+
same object around.
|
| 222 |
+
:param poll_interval: default interval for polling the lock file, in seconds. It will be used as fallback value
|
| 223 |
+
in the acquire method, if no poll_interval value (``None``) is given.
|
| 224 |
+
:param lifetime: maximum time in seconds a lock can be held before it is considered expired. When set, a waiting
|
| 225 |
+
process will break a lock whose file modification time is older than ``lifetime`` seconds. ``None`` (the
|
| 226 |
+
default) means locks never expire.
|
| 227 |
+
|
| 228 |
+
"""
|
| 229 |
+
self._is_thread_local = thread_local
|
| 230 |
+
self._is_singleton = is_singleton
|
| 231 |
+
|
| 232 |
+
# Create the context. Note that external code should not work with the context directly and should instead use
|
| 233 |
+
# properties of this class.
|
| 234 |
+
kwargs: dict[str, Any] = {
|
| 235 |
+
"lock_file": os.fspath(lock_file),
|
| 236 |
+
"timeout": timeout,
|
| 237 |
+
"mode": mode,
|
| 238 |
+
"blocking": blocking,
|
| 239 |
+
"poll_interval": poll_interval,
|
| 240 |
+
"lifetime": lifetime,
|
| 241 |
+
}
|
| 242 |
+
self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
|
| 243 |
+
|
| 244 |
+
def is_thread_local(self) -> bool:
|
| 245 |
+
""":returns: a flag indicating if this lock is thread local or not"""
|
| 246 |
+
return self._is_thread_local
|
| 247 |
+
|
| 248 |
+
@property
|
| 249 |
+
def is_singleton(self) -> bool:
|
| 250 |
+
"""
|
| 251 |
+
:returns: a flag indicating if this lock is singleton or not
|
| 252 |
+
|
| 253 |
+
.. versionadded:: 3.13.0
|
| 254 |
+
|
| 255 |
+
"""
|
| 256 |
+
return self._is_singleton
|
| 257 |
+
|
| 258 |
+
@property
|
| 259 |
+
def lock_file(self) -> str:
|
| 260 |
+
""":returns: path to the lock file"""
|
| 261 |
+
return self._context.lock_file
|
| 262 |
+
|
| 263 |
+
@property
|
| 264 |
+
def timeout(self) -> float:
|
| 265 |
+
"""
|
| 266 |
+
:returns: the default timeout value, in seconds
|
| 267 |
+
|
| 268 |
+
.. versionadded:: 2.0.0
|
| 269 |
+
|
| 270 |
+
"""
|
| 271 |
+
return self._context.timeout
|
| 272 |
+
|
| 273 |
+
@timeout.setter
|
| 274 |
+
def timeout(self, value: float | str) -> None:
|
| 275 |
+
"""
|
| 276 |
+
Change the default timeout value.
|
| 277 |
+
|
| 278 |
+
:param value: the new value, in seconds
|
| 279 |
+
|
| 280 |
+
"""
|
| 281 |
+
self._context.timeout = float(value)
|
| 282 |
+
|
| 283 |
+
@property
|
| 284 |
+
def blocking(self) -> bool:
|
| 285 |
+
"""
|
| 286 |
+
:returns: whether the locking is blocking or not
|
| 287 |
+
|
| 288 |
+
.. versionadded:: 3.14.0
|
| 289 |
+
|
| 290 |
+
"""
|
| 291 |
+
return self._context.blocking
|
| 292 |
+
|
| 293 |
+
@blocking.setter
|
| 294 |
+
def blocking(self, value: bool) -> None:
|
| 295 |
+
"""
|
| 296 |
+
Change the default blocking value.
|
| 297 |
+
|
| 298 |
+
:param value: the new value as bool
|
| 299 |
+
|
| 300 |
+
"""
|
| 301 |
+
self._context.blocking = value
|
| 302 |
+
|
| 303 |
+
@property
|
| 304 |
+
def poll_interval(self) -> float:
|
| 305 |
+
"""
|
| 306 |
+
:returns: the default polling interval, in seconds
|
| 307 |
+
|
| 308 |
+
.. versionadded:: 3.24.0
|
| 309 |
+
|
| 310 |
+
"""
|
| 311 |
+
return self._context.poll_interval
|
| 312 |
+
|
| 313 |
+
@poll_interval.setter
|
| 314 |
+
def poll_interval(self, value: float) -> None:
|
| 315 |
+
"""
|
| 316 |
+
Change the default polling interval.
|
| 317 |
+
|
| 318 |
+
:param value: the new value, in seconds
|
| 319 |
+
|
| 320 |
+
"""
|
| 321 |
+
self._context.poll_interval = value
|
| 322 |
+
|
| 323 |
+
@property
|
| 324 |
+
def lifetime(self) -> float | None:
|
| 325 |
+
"""
|
| 326 |
+
:returns: the lock lifetime in seconds, or ``None`` if the lock never expires
|
| 327 |
+
|
| 328 |
+
.. versionadded:: 3.24.0
|
| 329 |
+
|
| 330 |
+
"""
|
| 331 |
+
return self._context.lifetime
|
| 332 |
+
|
| 333 |
+
@lifetime.setter
|
| 334 |
+
def lifetime(self, value: float | None) -> None:
|
| 335 |
+
"""
|
| 336 |
+
Change the lock lifetime.
|
| 337 |
+
|
| 338 |
+
:param value: the new value in seconds, or ``None`` to disable expiration
|
| 339 |
+
|
| 340 |
+
"""
|
| 341 |
+
self._context.lifetime = value
|
| 342 |
+
|
| 343 |
+
@property
|
| 344 |
+
def mode(self) -> int:
|
| 345 |
+
""":returns: the file permissions for the lockfile"""
|
| 346 |
+
return 0o644 if self._context.mode == _UNSET_FILE_MODE else self._context.mode
|
| 347 |
+
|
| 348 |
+
@property
|
| 349 |
+
def has_explicit_mode(self) -> bool:
|
| 350 |
+
""":returns: whether the file permissions were explicitly set"""
|
| 351 |
+
return self._context.mode != _UNSET_FILE_MODE
|
| 352 |
+
|
| 353 |
+
def _open_mode(self) -> int:
|
| 354 |
+
""":returns: the mode for os.open() — 0o666 when unset (let umask/ACLs decide), else the explicit mode"""
|
| 355 |
+
return 0o666 if self._context.mode == _UNSET_FILE_MODE else self._context.mode
|
| 356 |
+
|
| 357 |
+
def _try_break_expired_lock(self) -> None:
|
| 358 |
+
"""Remove the lock file if its modification time exceeds the configured :attr:`lifetime`."""
|
| 359 |
+
if (lifetime := self._context.lifetime) is None:
|
| 360 |
+
return
|
| 361 |
+
with contextlib.suppress(OSError):
|
| 362 |
+
if time.time() - pathlib.Path(self.lock_file).stat().st_mtime < lifetime:
|
| 363 |
+
return
|
| 364 |
+
break_path = f"{self.lock_file}.break.{os.getpid()}"
|
| 365 |
+
pathlib.Path(self.lock_file).rename(break_path)
|
| 366 |
+
pathlib.Path(break_path).unlink()
|
| 367 |
+
|
| 368 |
+
@abstractmethod
|
| 369 |
+
def _acquire(self) -> None:
|
| 370 |
+
"""If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
|
| 371 |
+
raise NotImplementedError
|
| 372 |
+
|
| 373 |
+
@abstractmethod
|
| 374 |
+
def _release(self) -> None:
|
| 375 |
+
"""Releases the lock and sets self._context.lock_file_fd to None."""
|
| 376 |
+
raise NotImplementedError
|
| 377 |
+
|
| 378 |
+
@property
|
| 379 |
+
def is_locked(self) -> bool:
|
| 380 |
+
"""
|
| 381 |
+
:returns: A boolean indicating if the lock file is holding the lock currently.
|
| 382 |
+
|
| 383 |
+
.. versionchanged:: 2.0.0
|
| 384 |
+
|
| 385 |
+
This was previously a method and is now a property.
|
| 386 |
+
|
| 387 |
+
"""
|
| 388 |
+
return self._context.lock_file_fd is not None
|
| 389 |
+
|
| 390 |
+
@property
|
| 391 |
+
def lock_counter(self) -> int:
|
| 392 |
+
""":returns: The number of times this lock has been acquired (but not yet released)."""
|
| 393 |
+
return self._context.lock_counter
|
| 394 |
+
|
| 395 |
+
@staticmethod
|
| 396 |
+
def _check_give_up( # noqa: PLR0913
|
| 397 |
+
lock_id: int,
|
| 398 |
+
lock_filename: str,
|
| 399 |
+
*,
|
| 400 |
+
blocking: bool,
|
| 401 |
+
cancel_check: Callable[[], bool] | None,
|
| 402 |
+
timeout: float,
|
| 403 |
+
start_time: float,
|
| 404 |
+
) -> bool:
|
| 405 |
+
if blocking is False:
|
| 406 |
+
_LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
|
| 407 |
+
return True
|
| 408 |
+
if cancel_check is not None and cancel_check():
|
| 409 |
+
_LOGGER.debug("Cancellation requested for lock %s on %s", lock_id, lock_filename)
|
| 410 |
+
return True
|
| 411 |
+
if 0 <= timeout < time.perf_counter() - start_time:
|
| 412 |
+
_LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
|
| 413 |
+
return True
|
| 414 |
+
return False
|
| 415 |
+
|
| 416 |
+
def acquire( # noqa: C901
|
| 417 |
+
self,
|
| 418 |
+
timeout: float | None = None,
|
| 419 |
+
poll_interval: float | None = None,
|
| 420 |
+
*,
|
| 421 |
+
poll_intervall: float | None = None,
|
| 422 |
+
blocking: bool | None = None,
|
| 423 |
+
cancel_check: Callable[[], bool] | None = None,
|
| 424 |
+
) -> AcquireReturnProxy:
|
| 425 |
+
"""
|
| 426 |
+
Try to acquire the file lock.
|
| 427 |
+
|
| 428 |
+
:param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
|
| 429 |
+
if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
|
| 430 |
+
:param poll_interval: interval of trying to acquire the lock file, ``None`` means use the default
|
| 431 |
+
:attr:`~poll_interval`
|
| 432 |
+
:param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
|
| 433 |
+
:param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
|
| 434 |
+
first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
|
| 435 |
+
:param cancel_check: a callable returning ``True`` when the acquisition should be canceled. Checked on each poll
|
| 436 |
+
iteration. When triggered, raises :class:`~Timeout` just like an expired timeout.
|
| 437 |
+
|
| 438 |
+
:returns: a context object that will unlock the file when the context is exited
|
| 439 |
+
|
| 440 |
+
:raises Timeout: if fails to acquire lock within the timeout period
|
| 441 |
+
|
| 442 |
+
.. code-block:: python
|
| 443 |
+
|
| 444 |
+
# You can use this method in the context manager (recommended)
|
| 445 |
+
with lock.acquire():
|
| 446 |
+
pass
|
| 447 |
+
|
| 448 |
+
# Or use an equivalent try-finally construct:
|
| 449 |
+
lock.acquire()
|
| 450 |
+
try:
|
| 451 |
+
pass
|
| 452 |
+
finally:
|
| 453 |
+
lock.release()
|
| 454 |
+
|
| 455 |
+
.. versionchanged:: 2.0.0
|
| 456 |
+
|
| 457 |
+
This method returns now a *proxy* object instead of *self*, so that it can be used in a with statement
|
| 458 |
+
without side effects.
|
| 459 |
+
|
| 460 |
+
"""
|
| 461 |
+
# Use the default timeout, if no timeout is provided.
|
| 462 |
+
if timeout is None:
|
| 463 |
+
timeout = self._context.timeout
|
| 464 |
+
|
| 465 |
+
if blocking is None:
|
| 466 |
+
blocking = self._context.blocking
|
| 467 |
+
|
| 468 |
+
if poll_intervall is not None:
|
| 469 |
+
msg = "use poll_interval instead of poll_intervall"
|
| 470 |
+
warnings.warn(msg, DeprecationWarning, stacklevel=2)
|
| 471 |
+
poll_interval = poll_intervall
|
| 472 |
+
|
| 473 |
+
poll_interval = poll_interval if poll_interval is not None else self._context.poll_interval
|
| 474 |
+
|
| 475 |
+
# Increment the number right at the beginning. We can still undo it, if something fails.
|
| 476 |
+
self._context.lock_counter += 1
|
| 477 |
+
|
| 478 |
+
lock_id = id(self)
|
| 479 |
+
lock_filename = self.lock_file
|
| 480 |
+
canonical = _canonical(lock_filename)
|
| 481 |
+
|
| 482 |
+
would_block = self._context.lock_counter == 1 and not self.is_locked and timeout < 0 and blocking
|
| 483 |
+
if would_block and (existing := _registry.held.get(canonical)) is not None and existing != lock_id:
|
| 484 |
+
self._context.lock_counter -= 1
|
| 485 |
+
msg = (
|
| 486 |
+
f"Deadlock: lock '{lock_filename}' is already held by a different "
|
| 487 |
+
f"FileLock instance in this thread. Use is_singleton=True to "
|
| 488 |
+
f"enable reentrant locking across instances."
|
| 489 |
+
)
|
| 490 |
+
raise RuntimeError(msg)
|
| 491 |
+
|
| 492 |
+
start_time = time.perf_counter()
|
| 493 |
+
try:
|
| 494 |
+
while True:
|
| 495 |
+
if not self.is_locked:
|
| 496 |
+
self._try_break_expired_lock()
|
| 497 |
+
_LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
|
| 498 |
+
self._acquire()
|
| 499 |
+
if self.is_locked:
|
| 500 |
+
_LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
|
| 501 |
+
break
|
| 502 |
+
if self._check_give_up(
|
| 503 |
+
lock_id,
|
| 504 |
+
lock_filename,
|
| 505 |
+
blocking=blocking,
|
| 506 |
+
cancel_check=cancel_check,
|
| 507 |
+
timeout=timeout,
|
| 508 |
+
start_time=start_time,
|
| 509 |
+
):
|
| 510 |
+
raise Timeout(lock_filename) # noqa: TRY301
|
| 511 |
+
msg = "Lock %s not acquired on %s, waiting %s seconds ..."
|
| 512 |
+
_LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
|
| 513 |
+
time.sleep(poll_interval)
|
| 514 |
+
except BaseException:
|
| 515 |
+
self._context.lock_counter = max(0, self._context.lock_counter - 1)
|
| 516 |
+
if self._context.lock_counter == 0:
|
| 517 |
+
_registry.held.pop(canonical, None)
|
| 518 |
+
raise
|
| 519 |
+
if self._context.lock_counter == 1:
|
| 520 |
+
_registry.held[canonical] = lock_id
|
| 521 |
+
return AcquireReturnProxy(lock=self)
|
| 522 |
+
|
| 523 |
+
def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002
|
| 524 |
+
"""
|
| 525 |
+
Release the file lock. The lock is only completely released when the lock counter reaches 0. The lock file
|
| 526 |
+
itself is not automatically deleted.
|
| 527 |
+
|
| 528 |
+
:param force: If true, the lock counter is ignored and the lock is released in every case.
|
| 529 |
+
|
| 530 |
+
"""
|
| 531 |
+
if self.is_locked:
|
| 532 |
+
self._context.lock_counter -= 1
|
| 533 |
+
|
| 534 |
+
if self._context.lock_counter == 0 or force:
|
| 535 |
+
lock_id, lock_filename = id(self), self.lock_file
|
| 536 |
+
|
| 537 |
+
_LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
|
| 538 |
+
self._release()
|
| 539 |
+
self._context.lock_counter = 0
|
| 540 |
+
_registry.held.pop(_canonical(lock_filename), None)
|
| 541 |
+
_LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
|
| 542 |
+
|
| 543 |
+
def __enter__(self) -> Self:
|
| 544 |
+
"""
|
| 545 |
+
Acquire the lock.
|
| 546 |
+
|
| 547 |
+
:returns: the lock object
|
| 548 |
+
|
| 549 |
+
"""
|
| 550 |
+
self.acquire()
|
| 551 |
+
return self
|
| 552 |
+
|
| 553 |
+
def __exit__(
|
| 554 |
+
self,
|
| 555 |
+
exc_type: type[BaseException] | None,
|
| 556 |
+
exc_value: BaseException | None,
|
| 557 |
+
traceback: TracebackType | None,
|
| 558 |
+
) -> None:
|
| 559 |
+
"""
|
| 560 |
+
Release the lock.
|
| 561 |
+
|
| 562 |
+
:param exc_type: the exception type if raised
|
| 563 |
+
:param exc_value: the exception value if raised
|
| 564 |
+
:param traceback: the exception traceback if raised
|
| 565 |
+
|
| 566 |
+
"""
|
| 567 |
+
self.release()
|
| 568 |
+
|
| 569 |
+
def __del__(self) -> None:
|
| 570 |
+
"""Called when the lock object is deleted."""
|
| 571 |
+
self.release(force=True)
|
| 572 |
+
|
| 573 |
+
|
| 574 |
+
__all__ = [
|
| 575 |
+
"_UNSET_FILE_MODE",
|
| 576 |
+
"AcquireReturnProxy",
|
| 577 |
+
"BaseFileLock",
|
| 578 |
+
]
|
venv/lib/python3.10/site-packages/filelock/_error.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Timeout(TimeoutError): # noqa: N818
|
| 7 |
+
"""Raised when the lock could not be acquired in *timeout* seconds."""
|
| 8 |
+
|
| 9 |
+
def __init__(self, lock_file: str) -> None:
|
| 10 |
+
super().__init__()
|
| 11 |
+
self._lock_file = lock_file
|
| 12 |
+
|
| 13 |
+
def __reduce__(self) -> str | tuple[Any, ...]:
|
| 14 |
+
return self.__class__, (self._lock_file,) # Properly pickle the exception
|
| 15 |
+
|
| 16 |
+
def __str__(self) -> str:
|
| 17 |
+
return f"The file lock '{self._lock_file}' could not be acquired."
|
| 18 |
+
|
| 19 |
+
def __repr__(self) -> str:
|
| 20 |
+
return f"{self.__class__.__name__}({self.lock_file!r})"
|
| 21 |
+
|
| 22 |
+
@property
|
| 23 |
+
def lock_file(self) -> str:
|
| 24 |
+
""":returns: The path of the file lock."""
|
| 25 |
+
return self._lock_file
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
__all__ = [
|
| 29 |
+
"Timeout",
|
| 30 |
+
]
|
venv/lib/python3.10/site-packages/filelock/_read_write.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import atexit
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import pathlib
|
| 7 |
+
import sqlite3
|
| 8 |
+
import threading
|
| 9 |
+
import time
|
| 10 |
+
from contextlib import contextmanager, suppress
|
| 11 |
+
from typing import TYPE_CHECKING, Literal
|
| 12 |
+
from weakref import WeakValueDictionary
|
| 13 |
+
|
| 14 |
+
from ._api import AcquireReturnProxy
|
| 15 |
+
from ._error import Timeout
|
| 16 |
+
|
| 17 |
+
if TYPE_CHECKING:
|
| 18 |
+
from collections.abc import Generator
|
| 19 |
+
|
| 20 |
+
_LOGGER = logging.getLogger("filelock")
|
| 21 |
+
|
| 22 |
+
_all_connections: set[sqlite3.Connection] = set()
|
| 23 |
+
_all_connections_lock = threading.Lock()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _cleanup_connections() -> None:
|
| 27 |
+
with _all_connections_lock:
|
| 28 |
+
for con in list(_all_connections):
|
| 29 |
+
with suppress(Exception):
|
| 30 |
+
con.close()
|
| 31 |
+
_all_connections.clear()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
atexit.register(_cleanup_connections)
|
| 35 |
+
|
| 36 |
+
# sqlite3_busy_timeout() accepts a C int, max 2_147_483_647 on 32-bit. Use a lower value to be safe (~23 days).
|
| 37 |
+
_MAX_SQLITE_TIMEOUT_MS = 2_000_000_000 - 1
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def timeout_for_sqlite(timeout: float, *, blocking: bool, already_waited: float) -> int:
|
| 41 |
+
if blocking is False:
|
| 42 |
+
return 0
|
| 43 |
+
|
| 44 |
+
if timeout == -1:
|
| 45 |
+
return _MAX_SQLITE_TIMEOUT_MS
|
| 46 |
+
|
| 47 |
+
if timeout < 0:
|
| 48 |
+
msg = "timeout must be a non-negative number or -1"
|
| 49 |
+
raise ValueError(msg)
|
| 50 |
+
|
| 51 |
+
remaining = max(timeout - already_waited, 0) if timeout > 0 else timeout
|
| 52 |
+
timeout_ms = int(remaining * 1000)
|
| 53 |
+
if timeout_ms > _MAX_SQLITE_TIMEOUT_MS or timeout_ms < 0:
|
| 54 |
+
_LOGGER.warning("timeout %s is too large for SQLite, using %s ms instead", timeout, _MAX_SQLITE_TIMEOUT_MS)
|
| 55 |
+
return _MAX_SQLITE_TIMEOUT_MS
|
| 56 |
+
return timeout_ms
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class _ReadWriteLockMeta(type):
|
| 60 |
+
"""
|
| 61 |
+
Metaclass that handles singleton resolution when is_singleton=True.
|
| 62 |
+
|
| 63 |
+
Singleton logic lives here rather than in ReadWriteLock.get_lock so that ``ReadWriteLock(path)`` transparently
|
| 64 |
+
returns cached instances without a 2-arg ``super()`` call that type checkers cannot verify.
|
| 65 |
+
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
_instances: WeakValueDictionary[pathlib.Path, ReadWriteLock]
|
| 69 |
+
_instances_lock: threading.Lock
|
| 70 |
+
|
| 71 |
+
def __call__(
|
| 72 |
+
cls,
|
| 73 |
+
lock_file: str | os.PathLike[str],
|
| 74 |
+
timeout: float = -1,
|
| 75 |
+
*,
|
| 76 |
+
blocking: bool = True,
|
| 77 |
+
is_singleton: bool = True,
|
| 78 |
+
) -> ReadWriteLock:
|
| 79 |
+
if not is_singleton:
|
| 80 |
+
return super().__call__(lock_file, timeout, blocking=blocking, is_singleton=is_singleton)
|
| 81 |
+
|
| 82 |
+
normalized = pathlib.Path(lock_file).resolve()
|
| 83 |
+
with cls._instances_lock:
|
| 84 |
+
if normalized not in cls._instances:
|
| 85 |
+
instance = super().__call__(lock_file, timeout, blocking=blocking, is_singleton=is_singleton)
|
| 86 |
+
cls._instances[normalized] = instance
|
| 87 |
+
else:
|
| 88 |
+
instance = cls._instances[normalized]
|
| 89 |
+
|
| 90 |
+
if instance.timeout != timeout or instance.blocking != blocking:
|
| 91 |
+
msg = (
|
| 92 |
+
f"Singleton lock created with timeout={instance.timeout}, blocking={instance.blocking},"
|
| 93 |
+
f" cannot be changed to timeout={timeout}, blocking={blocking}"
|
| 94 |
+
)
|
| 95 |
+
raise ValueError(msg)
|
| 96 |
+
return instance
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class ReadWriteLock(metaclass=_ReadWriteLockMeta):
|
| 100 |
+
"""
|
| 101 |
+
Cross-process read-write lock backed by SQLite.
|
| 102 |
+
|
| 103 |
+
Allows concurrent shared readers or a single exclusive writer. The lock is reentrant within the same mode (multiple
|
| 104 |
+
``acquire_read`` calls nest, as do multiple ``acquire_write`` calls from the same thread), but upgrading from read
|
| 105 |
+
to write or downgrading from write to read raises :class:`RuntimeError`. Write locks are pinned to the thread that
|
| 106 |
+
acquired them.
|
| 107 |
+
|
| 108 |
+
By default, ``is_singleton=True``: calling ``ReadWriteLock(path)`` with the same resolved path returns the same
|
| 109 |
+
instance. The lock file must use a ``.db`` extension (SQLite database).
|
| 110 |
+
|
| 111 |
+
:param lock_file: path to the SQLite database file used as the lock
|
| 112 |
+
:param timeout: maximum wait time in seconds; ``-1`` means block indefinitely
|
| 113 |
+
:param blocking: if ``False``, raise :class:`~filelock.Timeout` immediately when the lock is unavailable
|
| 114 |
+
:param is_singleton: if ``True``, reuse existing instances for the same resolved path
|
| 115 |
+
|
| 116 |
+
.. versionadded:: 3.21.0
|
| 117 |
+
|
| 118 |
+
"""
|
| 119 |
+
|
| 120 |
+
_instances: WeakValueDictionary[pathlib.Path, ReadWriteLock] = WeakValueDictionary()
|
| 121 |
+
_instances_lock = threading.Lock()
|
| 122 |
+
|
| 123 |
+
@classmethod
|
| 124 |
+
def get_lock(
|
| 125 |
+
cls, lock_file: str | os.PathLike[str], timeout: float = -1, *, blocking: bool = True
|
| 126 |
+
) -> ReadWriteLock:
|
| 127 |
+
"""
|
| 128 |
+
Return the singleton :class:`ReadWriteLock` for *lock_file*.
|
| 129 |
+
|
| 130 |
+
:param lock_file: path to the SQLite database file used as the lock
|
| 131 |
+
:param timeout: maximum wait time in seconds; ``-1`` means block indefinitely
|
| 132 |
+
:param blocking: if ``False``, raise :class:`~filelock.Timeout` immediately when the lock is unavailable
|
| 133 |
+
|
| 134 |
+
:returns: the singleton lock instance
|
| 135 |
+
|
| 136 |
+
:raises ValueError: if an instance already exists for this path with different *timeout* or *blocking* values
|
| 137 |
+
|
| 138 |
+
"""
|
| 139 |
+
return cls(lock_file, timeout, blocking=blocking)
|
| 140 |
+
|
| 141 |
+
def __init__(
|
| 142 |
+
self,
|
| 143 |
+
lock_file: str | os.PathLike[str],
|
| 144 |
+
timeout: float = -1,
|
| 145 |
+
*,
|
| 146 |
+
blocking: bool = True,
|
| 147 |
+
is_singleton: bool = True, # noqa: ARG002 # consumed by _ReadWriteLockMeta.__call__
|
| 148 |
+
) -> None:
|
| 149 |
+
self.lock_file = os.fspath(lock_file)
|
| 150 |
+
self.timeout = timeout
|
| 151 |
+
self.blocking = blocking
|
| 152 |
+
self._transaction_lock = threading.Lock() # serializes the (possibly blocking) SQLite transaction work
|
| 153 |
+
self._internal_lock = threading.Lock() # protects _lock_level / _current_mode updates and rollback
|
| 154 |
+
self._lock_level = 0
|
| 155 |
+
self._current_mode: Literal["read", "write"] | None = None
|
| 156 |
+
self._write_thread_id: int | None = None
|
| 157 |
+
self._con = sqlite3.connect(self.lock_file, check_same_thread=False)
|
| 158 |
+
with _all_connections_lock:
|
| 159 |
+
_all_connections.add(self._con)
|
| 160 |
+
|
| 161 |
+
def _acquire_transaction_lock(self, *, blocking: bool, timeout: float) -> None:
|
| 162 |
+
if timeout == -1:
|
| 163 |
+
# blocking=True with no timeout means wait indefinitely per threading.Lock.acquire semantics
|
| 164 |
+
acquired = self._transaction_lock.acquire(blocking)
|
| 165 |
+
else:
|
| 166 |
+
acquired = self._transaction_lock.acquire(blocking, timeout)
|
| 167 |
+
if not acquired:
|
| 168 |
+
raise Timeout(self.lock_file) from None
|
| 169 |
+
|
| 170 |
+
def _validate_reentrant(self, mode: Literal["read", "write"], opposite: str, direction: str) -> AcquireReturnProxy:
|
| 171 |
+
if self._current_mode != mode:
|
| 172 |
+
msg = (
|
| 173 |
+
f"Cannot acquire {mode} lock on {self.lock_file} (lock id: {id(self)}): "
|
| 174 |
+
f"already holding a {opposite} lock ({direction} not allowed)"
|
| 175 |
+
)
|
| 176 |
+
raise RuntimeError(msg)
|
| 177 |
+
if mode == "write" and (cur := threading.get_ident()) != self._write_thread_id:
|
| 178 |
+
msg = (
|
| 179 |
+
f"Cannot acquire write lock on {self.lock_file} (lock id: {id(self)}) "
|
| 180 |
+
f"from thread {cur} while it is held by thread {self._write_thread_id}"
|
| 181 |
+
)
|
| 182 |
+
raise RuntimeError(msg)
|
| 183 |
+
self._lock_level += 1
|
| 184 |
+
return AcquireReturnProxy(lock=self)
|
| 185 |
+
|
| 186 |
+
def _configure_and_begin(
|
| 187 |
+
self, mode: Literal["read", "write"], timeout: float, *, blocking: bool, start_time: float
|
| 188 |
+
) -> None:
|
| 189 |
+
waited = time.perf_counter() - start_time
|
| 190 |
+
timeout_ms = timeout_for_sqlite(timeout, blocking=blocking, already_waited=waited)
|
| 191 |
+
self._con.execute(f"PRAGMA busy_timeout={timeout_ms};").close()
|
| 192 |
+
# Use legacy journal mode (not WAL) because WAL does not block readers when a concurrent EXCLUSIVE
|
| 193 |
+
# write transaction is active, making read-write locking impossible without modifying table data.
|
| 194 |
+
# MEMORY is safe here since no actual writes happen — crashes cannot corrupt the DB.
|
| 195 |
+
# See https://sqlite.org/lang_transaction.html#deferred_immediate_and_exclusive_transactions
|
| 196 |
+
#
|
| 197 |
+
# Set here (not in __init__) because this pragma itself may block on a locked database,
|
| 198 |
+
# so it must run after busy_timeout is configured above.
|
| 199 |
+
self._con.execute("PRAGMA journal_mode=MEMORY;").close()
|
| 200 |
+
# Recompute remaining timeout after the potentially blocking journal_mode pragma.
|
| 201 |
+
waited = time.perf_counter() - start_time
|
| 202 |
+
if (recomputed := timeout_for_sqlite(timeout, blocking=blocking, already_waited=waited)) != timeout_ms:
|
| 203 |
+
self._con.execute(f"PRAGMA busy_timeout={recomputed};").close()
|
| 204 |
+
stmt = "BEGIN EXCLUSIVE TRANSACTION;" if mode == "write" else "BEGIN TRANSACTION;"
|
| 205 |
+
self._con.execute(stmt).close()
|
| 206 |
+
if mode == "read":
|
| 207 |
+
# A SELECT is needed to force SQLite to actually acquire the SHARED lock on the database.
|
| 208 |
+
# https://www.sqlite.org/lockingv3.html#transaction_control
|
| 209 |
+
self._con.execute("SELECT name FROM sqlite_schema LIMIT 1;").close()
|
| 210 |
+
|
| 211 |
+
def _acquire(self, mode: Literal["read", "write"], timeout: float, *, blocking: bool) -> AcquireReturnProxy:
|
| 212 |
+
opposite = "write" if mode == "read" else "read"
|
| 213 |
+
direction = "downgrade" if mode == "read" else "upgrade"
|
| 214 |
+
|
| 215 |
+
with self._internal_lock:
|
| 216 |
+
if self._lock_level > 0:
|
| 217 |
+
return self._validate_reentrant(mode, opposite, direction)
|
| 218 |
+
|
| 219 |
+
start_time = time.perf_counter()
|
| 220 |
+
self._acquire_transaction_lock(blocking=blocking, timeout=timeout)
|
| 221 |
+
try:
|
| 222 |
+
# Double-check: another thread may have acquired the lock while we waited on _transaction_lock.
|
| 223 |
+
with self._internal_lock:
|
| 224 |
+
if self._lock_level > 0:
|
| 225 |
+
return self._validate_reentrant(mode, opposite, direction)
|
| 226 |
+
|
| 227 |
+
self._configure_and_begin(mode, timeout, blocking=blocking, start_time=start_time)
|
| 228 |
+
|
| 229 |
+
with self._internal_lock:
|
| 230 |
+
self._current_mode = mode
|
| 231 |
+
self._lock_level = 1
|
| 232 |
+
if mode == "write":
|
| 233 |
+
self._write_thread_id = threading.get_ident()
|
| 234 |
+
|
| 235 |
+
return AcquireReturnProxy(lock=self)
|
| 236 |
+
|
| 237 |
+
except sqlite3.OperationalError as exc:
|
| 238 |
+
if "database is locked" not in str(exc):
|
| 239 |
+
raise
|
| 240 |
+
raise Timeout(self.lock_file) from None
|
| 241 |
+
finally:
|
| 242 |
+
self._transaction_lock.release()
|
| 243 |
+
|
| 244 |
+
def acquire_read(self, timeout: float = -1, *, blocking: bool = True) -> AcquireReturnProxy:
|
| 245 |
+
"""
|
| 246 |
+
Acquire a shared read lock.
|
| 247 |
+
|
| 248 |
+
If this instance already holds a read lock, the lock level is incremented (reentrant). Attempting to acquire a
|
| 249 |
+
read lock while holding a write lock raises :class:`RuntimeError` (downgrade not allowed).
|
| 250 |
+
|
| 251 |
+
:param timeout: maximum wait time in seconds; ``-1`` means block indefinitely
|
| 252 |
+
:param blocking: if ``False``, raise :class:`~filelock.Timeout` immediately when the lock is unavailable
|
| 253 |
+
|
| 254 |
+
:returns: a proxy that can be used as a context manager to release the lock
|
| 255 |
+
|
| 256 |
+
:raises RuntimeError: if a write lock is already held on this instance
|
| 257 |
+
:raises Timeout: if the lock cannot be acquired within *timeout* seconds
|
| 258 |
+
|
| 259 |
+
"""
|
| 260 |
+
return self._acquire("read", timeout, blocking=blocking)
|
| 261 |
+
|
| 262 |
+
def acquire_write(self, timeout: float = -1, *, blocking: bool = True) -> AcquireReturnProxy:
|
| 263 |
+
"""
|
| 264 |
+
Acquire an exclusive write lock.
|
| 265 |
+
|
| 266 |
+
If this instance already holds a write lock from the same thread, the lock level is incremented (reentrant).
|
| 267 |
+
Attempting to acquire a write lock while holding a read lock raises :class:`RuntimeError` (upgrade not allowed).
|
| 268 |
+
Write locks are pinned to the acquiring thread: a different thread trying to re-enter also raises
|
| 269 |
+
:class:`RuntimeError`.
|
| 270 |
+
|
| 271 |
+
:param timeout: maximum wait time in seconds; ``-1`` means block indefinitely
|
| 272 |
+
:param blocking: if ``False``, raise :class:`~filelock.Timeout` immediately when the lock is unavailable
|
| 273 |
+
|
| 274 |
+
:returns: a proxy that can be used as a context manager to release the lock
|
| 275 |
+
|
| 276 |
+
:raises RuntimeError: if a read lock is already held, or a write lock is held by a different thread
|
| 277 |
+
:raises Timeout: if the lock cannot be acquired within *timeout* seconds
|
| 278 |
+
|
| 279 |
+
"""
|
| 280 |
+
return self._acquire("write", timeout, blocking=blocking)
|
| 281 |
+
|
| 282 |
+
def release(self, *, force: bool = False) -> None:
|
| 283 |
+
"""
|
| 284 |
+
Release one level of the current lock.
|
| 285 |
+
|
| 286 |
+
When the lock level reaches zero the underlying SQLite transaction is rolled back, releasing the database lock.
|
| 287 |
+
|
| 288 |
+
:param force: if ``True``, release the lock completely regardless of the current lock level
|
| 289 |
+
|
| 290 |
+
:raises RuntimeError: if no lock is currently held and *force* is ``False``
|
| 291 |
+
|
| 292 |
+
"""
|
| 293 |
+
should_rollback = False
|
| 294 |
+
with self._internal_lock:
|
| 295 |
+
if self._lock_level == 0:
|
| 296 |
+
if force:
|
| 297 |
+
return
|
| 298 |
+
msg = f"Cannot release a lock on {self.lock_file} (lock id: {id(self)}) that is not held"
|
| 299 |
+
raise RuntimeError(msg)
|
| 300 |
+
if force:
|
| 301 |
+
self._lock_level = 0
|
| 302 |
+
else:
|
| 303 |
+
self._lock_level -= 1
|
| 304 |
+
if self._lock_level == 0:
|
| 305 |
+
self._current_mode = None
|
| 306 |
+
self._write_thread_id = None
|
| 307 |
+
should_rollback = True
|
| 308 |
+
if should_rollback:
|
| 309 |
+
self._con.rollback()
|
| 310 |
+
|
| 311 |
+
@contextmanager
|
| 312 |
+
def read_lock(self, timeout: float | None = None, *, blocking: bool | None = None) -> Generator[None]:
|
| 313 |
+
"""
|
| 314 |
+
Context manager that acquires and releases a shared read lock.
|
| 315 |
+
|
| 316 |
+
Falls back to instance defaults for *timeout* and *blocking* when ``None``.
|
| 317 |
+
|
| 318 |
+
:param timeout: maximum wait time in seconds, or ``None`` to use the instance default
|
| 319 |
+
:param blocking: if ``False``, raise :class:`~filelock.Timeout` immediately; ``None`` uses the instance default
|
| 320 |
+
|
| 321 |
+
"""
|
| 322 |
+
if timeout is None:
|
| 323 |
+
timeout = self.timeout
|
| 324 |
+
if blocking is None:
|
| 325 |
+
blocking = self.blocking
|
| 326 |
+
self.acquire_read(timeout, blocking=blocking)
|
| 327 |
+
try:
|
| 328 |
+
yield
|
| 329 |
+
finally:
|
| 330 |
+
self.release()
|
| 331 |
+
|
| 332 |
+
@contextmanager
|
| 333 |
+
def write_lock(self, timeout: float | None = None, *, blocking: bool | None = None) -> Generator[None]:
|
| 334 |
+
"""
|
| 335 |
+
Context manager that acquires and releases an exclusive write lock.
|
| 336 |
+
|
| 337 |
+
Falls back to instance defaults for *timeout* and *blocking* when ``None``.
|
| 338 |
+
|
| 339 |
+
:param timeout: maximum wait time in seconds, or ``None`` to use the instance default
|
| 340 |
+
:param blocking: if ``False``, raise :class:`~filelock.Timeout` immediately; ``None`` uses the instance default
|
| 341 |
+
|
| 342 |
+
"""
|
| 343 |
+
if timeout is None:
|
| 344 |
+
timeout = self.timeout
|
| 345 |
+
if blocking is None:
|
| 346 |
+
blocking = self.blocking
|
| 347 |
+
self.acquire_write(timeout, blocking=blocking)
|
| 348 |
+
try:
|
| 349 |
+
yield
|
| 350 |
+
finally:
|
| 351 |
+
self.release()
|
| 352 |
+
|
| 353 |
+
def close(self) -> None:
|
| 354 |
+
"""
|
| 355 |
+
Release the lock (if held) and close the underlying SQLite connection.
|
| 356 |
+
|
| 357 |
+
After calling this method, the lock instance is no longer usable.
|
| 358 |
+
|
| 359 |
+
"""
|
| 360 |
+
self.release(force=True)
|
| 361 |
+
self._con.close()
|
| 362 |
+
with _all_connections_lock:
|
| 363 |
+
_all_connections.discard(self._con)
|
venv/lib/python3.10/site-packages/filelock/_soft.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import socket
|
| 5 |
+
import sys
|
| 6 |
+
import time
|
| 7 |
+
from contextlib import suppress
|
| 8 |
+
from errno import EACCES, EEXIST, EPERM, ESRCH
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from ._api import BaseFileLock
|
| 12 |
+
from ._util import ensure_directory_exists, raise_on_not_writable_file
|
| 13 |
+
|
| 14 |
+
_WIN_SYNCHRONIZE = 0x100000
|
| 15 |
+
_WIN_ERROR_INVALID_PARAMETER = 87
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class SoftFileLock(BaseFileLock):
|
| 19 |
+
"""
|
| 20 |
+
Portable file lock based on file existence.
|
| 21 |
+
|
| 22 |
+
Unlike :class:`UnixFileLock <filelock.UnixFileLock>` and :class:`WindowsFileLock <filelock.WindowsFileLock>`, this
|
| 23 |
+
lock does not use OS-level locking primitives. Instead, it creates the lock file with ``O_CREAT | O_EXCL`` and
|
| 24 |
+
treats its existence as the lock indicator. This makes it work on any filesystem but leaves stale lock files behind
|
| 25 |
+
if the process crashes without releasing the lock.
|
| 26 |
+
|
| 27 |
+
To mitigate stale locks, the lock file contains the PID and hostname of the holding process. On contention, if the
|
| 28 |
+
holder is on the same host and its PID no longer exists, the stale lock is broken automatically.
|
| 29 |
+
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def _acquire(self) -> None:
|
| 33 |
+
raise_on_not_writable_file(self.lock_file)
|
| 34 |
+
ensure_directory_exists(self.lock_file)
|
| 35 |
+
flags = (
|
| 36 |
+
os.O_WRONLY # open for writing only
|
| 37 |
+
| os.O_CREAT
|
| 38 |
+
| os.O_EXCL # together with above raise EEXIST if the file specified by filename exists
|
| 39 |
+
| os.O_TRUNC # truncate the file to zero byte
|
| 40 |
+
)
|
| 41 |
+
if (o_nofollow := getattr(os, "O_NOFOLLOW", None)) is not None:
|
| 42 |
+
flags |= o_nofollow
|
| 43 |
+
try:
|
| 44 |
+
file_handler = os.open(self.lock_file, flags, self._open_mode())
|
| 45 |
+
except OSError as exception:
|
| 46 |
+
if not (
|
| 47 |
+
exception.errno == EEXIST or (exception.errno == EACCES and sys.platform == "win32")
|
| 48 |
+
): # pragma: win32 no cover
|
| 49 |
+
raise
|
| 50 |
+
if exception.errno == EEXIST and sys.platform != "win32": # pragma: win32 no cover
|
| 51 |
+
self._try_break_stale_lock()
|
| 52 |
+
else:
|
| 53 |
+
self._write_lock_info(file_handler)
|
| 54 |
+
self._context.lock_file_fd = file_handler
|
| 55 |
+
|
| 56 |
+
def _try_break_stale_lock(self) -> None:
|
| 57 |
+
with suppress(OSError):
|
| 58 |
+
content = Path(self.lock_file).read_text(encoding="utf-8")
|
| 59 |
+
lines = content.strip().splitlines()
|
| 60 |
+
if len(lines) != 2: # noqa: PLR2004
|
| 61 |
+
return
|
| 62 |
+
pid_str, hostname = lines
|
| 63 |
+
if hostname != socket.gethostname():
|
| 64 |
+
return
|
| 65 |
+
pid = int(pid_str)
|
| 66 |
+
if self._is_process_alive(pid):
|
| 67 |
+
return
|
| 68 |
+
break_path = f"{self.lock_file}.break.{os.getpid()}"
|
| 69 |
+
Path(self.lock_file).rename(break_path)
|
| 70 |
+
Path(break_path).unlink()
|
| 71 |
+
|
| 72 |
+
@staticmethod
|
| 73 |
+
def _is_process_alive(pid: int) -> bool:
|
| 74 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 75 |
+
import ctypes # noqa: PLC0415
|
| 76 |
+
|
| 77 |
+
kernel32 = ctypes.windll.kernel32
|
| 78 |
+
handle = kernel32.OpenProcess(_WIN_SYNCHRONIZE, 0, pid)
|
| 79 |
+
if handle:
|
| 80 |
+
kernel32.CloseHandle(handle)
|
| 81 |
+
return True
|
| 82 |
+
return kernel32.GetLastError() != _WIN_ERROR_INVALID_PARAMETER
|
| 83 |
+
try:
|
| 84 |
+
os.kill(pid, 0)
|
| 85 |
+
except OSError as exc:
|
| 86 |
+
if exc.errno == ESRCH:
|
| 87 |
+
return False
|
| 88 |
+
if exc.errno == EPERM:
|
| 89 |
+
return True
|
| 90 |
+
raise
|
| 91 |
+
return True
|
| 92 |
+
|
| 93 |
+
@staticmethod
|
| 94 |
+
def _write_lock_info(fd: int) -> None:
|
| 95 |
+
with suppress(OSError):
|
| 96 |
+
os.write(fd, f"{os.getpid()}\n{socket.gethostname()}\n".encode())
|
| 97 |
+
|
| 98 |
+
def _release(self) -> None:
|
| 99 |
+
assert self._context.lock_file_fd is not None # noqa: S101
|
| 100 |
+
os.close(self._context.lock_file_fd)
|
| 101 |
+
self._context.lock_file_fd = None
|
| 102 |
+
if sys.platform == "win32":
|
| 103 |
+
self._windows_unlink_with_retry()
|
| 104 |
+
else:
|
| 105 |
+
with suppress(OSError):
|
| 106 |
+
Path(self.lock_file).unlink()
|
| 107 |
+
|
| 108 |
+
def _windows_unlink_with_retry(self) -> None:
|
| 109 |
+
max_retries = 10
|
| 110 |
+
retry_delay = 0.001
|
| 111 |
+
for attempt in range(max_retries):
|
| 112 |
+
# Windows doesn't immediately release file handles after close, causing EACCES/EPERM on unlink
|
| 113 |
+
try:
|
| 114 |
+
Path(self.lock_file).unlink()
|
| 115 |
+
except OSError as exc: # noqa: PERF203
|
| 116 |
+
if exc.errno not in {EACCES, EPERM}:
|
| 117 |
+
return
|
| 118 |
+
if attempt < max_retries - 1:
|
| 119 |
+
time.sleep(retry_delay)
|
| 120 |
+
retry_delay *= 2
|
| 121 |
+
else:
|
| 122 |
+
return
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
__all__ = [
|
| 126 |
+
"SoftFileLock",
|
| 127 |
+
]
|
venv/lib/python3.10/site-packages/filelock/_unix.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import warnings
|
| 6 |
+
from contextlib import suppress
|
| 7 |
+
from errno import EAGAIN, ENOSYS, EWOULDBLOCK
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import cast
|
| 10 |
+
|
| 11 |
+
from ._api import BaseFileLock
|
| 12 |
+
from ._util import ensure_directory_exists
|
| 13 |
+
|
| 14 |
+
#: a flag to indicate if the fcntl API is available
|
| 15 |
+
has_fcntl = False
|
| 16 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 17 |
+
|
| 18 |
+
class UnixFileLock(BaseFileLock):
|
| 19 |
+
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
|
| 20 |
+
|
| 21 |
+
def _acquire(self) -> None:
|
| 22 |
+
raise NotImplementedError
|
| 23 |
+
|
| 24 |
+
def _release(self) -> None:
|
| 25 |
+
raise NotImplementedError
|
| 26 |
+
|
| 27 |
+
else: # pragma: win32 no cover
|
| 28 |
+
try:
|
| 29 |
+
import fcntl
|
| 30 |
+
|
| 31 |
+
_ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN)
|
| 32 |
+
except (ImportError, AttributeError):
|
| 33 |
+
pass
|
| 34 |
+
else:
|
| 35 |
+
has_fcntl = True
|
| 36 |
+
|
| 37 |
+
class UnixFileLock(BaseFileLock):
|
| 38 |
+
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
|
| 39 |
+
|
| 40 |
+
def _acquire(self) -> None: # noqa: C901, PLR0912
|
| 41 |
+
ensure_directory_exists(self.lock_file)
|
| 42 |
+
open_flags = os.O_RDWR | os.O_TRUNC
|
| 43 |
+
o_nofollow = getattr(os, "O_NOFOLLOW", None)
|
| 44 |
+
if o_nofollow is not None:
|
| 45 |
+
open_flags |= o_nofollow
|
| 46 |
+
open_flags |= os.O_CREAT
|
| 47 |
+
open_mode = self._open_mode()
|
| 48 |
+
try:
|
| 49 |
+
fd = os.open(self.lock_file, open_flags, open_mode)
|
| 50 |
+
except FileNotFoundError:
|
| 51 |
+
# On FUSE/NFS, os.open(O_CREAT) is not atomic: LOOKUP + CREATE can be split, allowing a concurrent
|
| 52 |
+
# unlink() to delete the file between them. For valid paths, treat ENOENT as transient contention.
|
| 53 |
+
# For invalid paths (e.g., empty string), re-raise to avoid infinite retry loops.
|
| 54 |
+
if self.lock_file and Path(self.lock_file).parent.exists():
|
| 55 |
+
return
|
| 56 |
+
raise
|
| 57 |
+
except PermissionError:
|
| 58 |
+
# Sticky-bit dirs (e.g. /tmp): O_CREAT fails if the file is owned by another user (#317).
|
| 59 |
+
# Fall back to opening the existing file without O_CREAT.
|
| 60 |
+
if not Path(self.lock_file).exists():
|
| 61 |
+
raise
|
| 62 |
+
try:
|
| 63 |
+
fd = os.open(self.lock_file, open_flags & ~os.O_CREAT, open_mode)
|
| 64 |
+
except FileNotFoundError:
|
| 65 |
+
return
|
| 66 |
+
if self.has_explicit_mode:
|
| 67 |
+
with suppress(PermissionError):
|
| 68 |
+
os.fchmod(fd, self._context.mode)
|
| 69 |
+
try:
|
| 70 |
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
| 71 |
+
except OSError as exception:
|
| 72 |
+
os.close(fd)
|
| 73 |
+
if exception.errno == ENOSYS:
|
| 74 |
+
with suppress(OSError):
|
| 75 |
+
Path(self.lock_file).unlink()
|
| 76 |
+
self._fallback_to_soft_lock()
|
| 77 |
+
self._acquire()
|
| 78 |
+
return
|
| 79 |
+
if exception.errno not in {EAGAIN, EWOULDBLOCK}:
|
| 80 |
+
raise
|
| 81 |
+
else:
|
| 82 |
+
# The file may have been unlinked by a concurrent _release() between our open() and flock().
|
| 83 |
+
# A lock on an unlinked inode is useless — discard and let the retry loop start fresh.
|
| 84 |
+
if os.fstat(fd).st_nlink == 0:
|
| 85 |
+
os.close(fd)
|
| 86 |
+
else:
|
| 87 |
+
self._context.lock_file_fd = fd
|
| 88 |
+
|
| 89 |
+
def _fallback_to_soft_lock(self) -> None:
|
| 90 |
+
from ._soft import SoftFileLock # noqa: PLC0415
|
| 91 |
+
|
| 92 |
+
warnings.warn("flock not supported on this filesystem, falling back to SoftFileLock", stacklevel=2)
|
| 93 |
+
from .asyncio import AsyncSoftFileLock, BaseAsyncFileLock # noqa: PLC0415
|
| 94 |
+
|
| 95 |
+
self.__class__ = AsyncSoftFileLock if isinstance(self, BaseAsyncFileLock) else SoftFileLock
|
| 96 |
+
|
| 97 |
+
def _release(self) -> None:
|
| 98 |
+
fd = cast("int", self._context.lock_file_fd)
|
| 99 |
+
self._context.lock_file_fd = None
|
| 100 |
+
with suppress(OSError):
|
| 101 |
+
Path(self.lock_file).unlink()
|
| 102 |
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
| 103 |
+
os.close(fd)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
__all__ = [
|
| 107 |
+
"UnixFileLock",
|
| 108 |
+
"has_fcntl",
|
| 109 |
+
]
|
venv/lib/python3.10/site-packages/filelock/_util.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import stat
|
| 5 |
+
import sys
|
| 6 |
+
from errno import EACCES, EISDIR
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def raise_on_not_writable_file(filename: str) -> None:
|
| 11 |
+
"""
|
| 12 |
+
Raise an exception if attempting to open the file for writing would fail.
|
| 13 |
+
|
| 14 |
+
This is done so files that will never be writable can be separated from files that are writable but currently
|
| 15 |
+
locked.
|
| 16 |
+
|
| 17 |
+
:param filename: file to check
|
| 18 |
+
|
| 19 |
+
:raises OSError: as if the file was opened for writing.
|
| 20 |
+
|
| 21 |
+
"""
|
| 22 |
+
try: # use stat to do exists + can write to check without race condition
|
| 23 |
+
file_stat = os.stat(filename) # noqa: PTH116
|
| 24 |
+
except OSError:
|
| 25 |
+
return # swallow does not exist or other errors
|
| 26 |
+
|
| 27 |
+
if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
|
| 28 |
+
if not (file_stat.st_mode & stat.S_IWUSR):
|
| 29 |
+
raise PermissionError(EACCES, "Permission denied", filename)
|
| 30 |
+
|
| 31 |
+
if stat.S_ISDIR(file_stat.st_mode):
|
| 32 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 33 |
+
# On Windows, this is PermissionError
|
| 34 |
+
raise PermissionError(EACCES, "Permission denied", filename)
|
| 35 |
+
else: # pragma: win32 no cover # noqa: RET506
|
| 36 |
+
# On linux / macOS, this is IsADirectoryError
|
| 37 |
+
raise IsADirectoryError(EISDIR, "Is a directory", filename)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def ensure_directory_exists(filename: Path | str) -> None:
|
| 41 |
+
"""
|
| 42 |
+
Ensure the directory containing the file exists (create it if necessary).
|
| 43 |
+
|
| 44 |
+
:param filename: file.
|
| 45 |
+
|
| 46 |
+
"""
|
| 47 |
+
Path(filename).parent.mkdir(parents=True, exist_ok=True)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
__all__ = [
|
| 51 |
+
"ensure_directory_exists",
|
| 52 |
+
"raise_on_not_writable_file",
|
| 53 |
+
]
|
venv/lib/python3.10/site-packages/filelock/_windows.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from errno import EACCES
|
| 6 |
+
from typing import cast
|
| 7 |
+
|
| 8 |
+
from ._api import BaseFileLock
|
| 9 |
+
from ._util import ensure_directory_exists, raise_on_not_writable_file
|
| 10 |
+
|
| 11 |
+
if sys.platform == "win32": # pragma: win32 cover
|
| 12 |
+
import ctypes
|
| 13 |
+
import msvcrt
|
| 14 |
+
from ctypes import wintypes
|
| 15 |
+
|
| 16 |
+
# Windows API constants for reparse point detection
|
| 17 |
+
FILE_ATTRIBUTE_REPARSE_POINT = 0x00000400
|
| 18 |
+
INVALID_FILE_ATTRIBUTES = 0xFFFFFFFF
|
| 19 |
+
|
| 20 |
+
# Load kernel32.dll
|
| 21 |
+
_kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
|
| 22 |
+
_kernel32.GetFileAttributesW.argtypes = [wintypes.LPCWSTR]
|
| 23 |
+
_kernel32.GetFileAttributesW.restype = wintypes.DWORD
|
| 24 |
+
|
| 25 |
+
def _is_reparse_point(path: str) -> bool:
|
| 26 |
+
"""
|
| 27 |
+
Check if a path is a reparse point (symlink, junction, etc.) on Windows.
|
| 28 |
+
|
| 29 |
+
:param path: Path to check
|
| 30 |
+
|
| 31 |
+
:returns: True if path is a reparse point, False otherwise
|
| 32 |
+
|
| 33 |
+
:raises OSError: If GetFileAttributesW fails for reasons other than file-not-found
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
attrs = _kernel32.GetFileAttributesW(path)
|
| 37 |
+
if attrs == INVALID_FILE_ATTRIBUTES:
|
| 38 |
+
# File doesn't exist yet - that's fine, we'll create it
|
| 39 |
+
err = ctypes.get_last_error()
|
| 40 |
+
if err == 2: # noqa: PLR2004 # ERROR_FILE_NOT_FOUND
|
| 41 |
+
return False
|
| 42 |
+
if err == 3: # noqa: PLR2004 # ERROR_PATH_NOT_FOUND
|
| 43 |
+
return False
|
| 44 |
+
# Some other error - let caller handle it
|
| 45 |
+
return False
|
| 46 |
+
return bool(attrs & FILE_ATTRIBUTE_REPARSE_POINT)
|
| 47 |
+
|
| 48 |
+
class WindowsFileLock(BaseFileLock):
|
| 49 |
+
"""Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
|
| 50 |
+
|
| 51 |
+
def _acquire(self) -> None:
|
| 52 |
+
raise_on_not_writable_file(self.lock_file)
|
| 53 |
+
ensure_directory_exists(self.lock_file)
|
| 54 |
+
|
| 55 |
+
# Security check: Refuse to open reparse points (symlinks, junctions)
|
| 56 |
+
# This prevents TOCTOU symlink attacks (CVE-TBD)
|
| 57 |
+
if _is_reparse_point(self.lock_file):
|
| 58 |
+
msg = f"Lock file is a reparse point (symlink/junction): {self.lock_file}"
|
| 59 |
+
raise OSError(msg)
|
| 60 |
+
|
| 61 |
+
flags = (
|
| 62 |
+
os.O_RDWR # open for read and write
|
| 63 |
+
| os.O_CREAT # create file if not exists
|
| 64 |
+
)
|
| 65 |
+
try:
|
| 66 |
+
fd = os.open(self.lock_file, flags, self._open_mode())
|
| 67 |
+
except OSError as exception:
|
| 68 |
+
if exception.errno != EACCES: # has no access to this lock
|
| 69 |
+
raise
|
| 70 |
+
else:
|
| 71 |
+
try:
|
| 72 |
+
msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
|
| 73 |
+
except OSError as exception:
|
| 74 |
+
os.close(fd) # close file first
|
| 75 |
+
if exception.errno != EACCES: # file is already locked
|
| 76 |
+
raise
|
| 77 |
+
else:
|
| 78 |
+
self._context.lock_file_fd = fd
|
| 79 |
+
|
| 80 |
+
def _release(self) -> None:
|
| 81 |
+
fd = cast("int", self._context.lock_file_fd)
|
| 82 |
+
self._context.lock_file_fd = None
|
| 83 |
+
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
|
| 84 |
+
os.close(fd)
|
| 85 |
+
|
| 86 |
+
else: # pragma: win32 no cover
|
| 87 |
+
|
| 88 |
+
class WindowsFileLock(BaseFileLock):
|
| 89 |
+
"""Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
|
| 90 |
+
|
| 91 |
+
def _acquire(self) -> None:
|
| 92 |
+
raise NotImplementedError
|
| 93 |
+
|
| 94 |
+
def _release(self) -> None:
|
| 95 |
+
raise NotImplementedError
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
__all__ = [
|
| 99 |
+
"WindowsFileLock",
|
| 100 |
+
]
|
venv/lib/python3.10/site-packages/filelock/asyncio.py
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""An asyncio-based implementation of the file lock."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import asyncio
|
| 6 |
+
import contextlib
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
import time
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from inspect import iscoroutinefunction
|
| 12 |
+
from threading import local
|
| 13 |
+
from typing import TYPE_CHECKING, Any, NoReturn, cast
|
| 14 |
+
|
| 15 |
+
from ._api import _UNSET_FILE_MODE, BaseFileLock, FileLockContext, FileLockMeta
|
| 16 |
+
from ._error import Timeout
|
| 17 |
+
from ._soft import SoftFileLock
|
| 18 |
+
from ._unix import UnixFileLock
|
| 19 |
+
from ._windows import WindowsFileLock
|
| 20 |
+
|
| 21 |
+
if TYPE_CHECKING:
|
| 22 |
+
import sys
|
| 23 |
+
from collections.abc import Callable
|
| 24 |
+
from concurrent import futures
|
| 25 |
+
from types import TracebackType
|
| 26 |
+
|
| 27 |
+
if sys.version_info >= (3, 11): # pragma: no cover (py311+)
|
| 28 |
+
from typing import Self
|
| 29 |
+
else: # pragma: no cover (<py311)
|
| 30 |
+
from typing_extensions import Self
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
_LOGGER = logging.getLogger("filelock")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class AsyncFileLockContext(FileLockContext):
|
| 38 |
+
"""A dataclass which holds the context for a ``BaseAsyncFileLock`` object."""
|
| 39 |
+
|
| 40 |
+
#: Whether run in executor
|
| 41 |
+
run_in_executor: bool = True
|
| 42 |
+
|
| 43 |
+
#: The executor
|
| 44 |
+
executor: futures.Executor | None = None
|
| 45 |
+
|
| 46 |
+
#: The loop
|
| 47 |
+
loop: asyncio.AbstractEventLoop | None = None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class AsyncThreadLocalFileContext(AsyncFileLockContext, local):
|
| 51 |
+
"""A thread local version of the ``FileLockContext`` class."""
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class AsyncAcquireReturnProxy:
|
| 55 |
+
"""A context-aware object that will release the lock file when exiting."""
|
| 56 |
+
|
| 57 |
+
def __init__(self, lock: BaseAsyncFileLock) -> None: # noqa: D107
|
| 58 |
+
self.lock = lock
|
| 59 |
+
|
| 60 |
+
async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105
|
| 61 |
+
return self.lock
|
| 62 |
+
|
| 63 |
+
async def __aexit__( # noqa: D105
|
| 64 |
+
self,
|
| 65 |
+
exc_type: type[BaseException] | None,
|
| 66 |
+
exc_value: BaseException | None,
|
| 67 |
+
traceback: TracebackType | None,
|
| 68 |
+
) -> None:
|
| 69 |
+
await self.lock.release()
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class AsyncFileLockMeta(FileLockMeta):
|
| 73 |
+
def __call__( # ty: ignore[invalid-method-override] # noqa: PLR0913
|
| 74 |
+
cls, # noqa: N805
|
| 75 |
+
lock_file: str | os.PathLike[str],
|
| 76 |
+
timeout: float = -1,
|
| 77 |
+
mode: int = _UNSET_FILE_MODE,
|
| 78 |
+
thread_local: bool = False, # noqa: FBT001, FBT002
|
| 79 |
+
*,
|
| 80 |
+
blocking: bool = True,
|
| 81 |
+
is_singleton: bool = False,
|
| 82 |
+
poll_interval: float = 0.05,
|
| 83 |
+
lifetime: float | None = None,
|
| 84 |
+
loop: asyncio.AbstractEventLoop | None = None,
|
| 85 |
+
run_in_executor: bool = True,
|
| 86 |
+
executor: futures.Executor | None = None,
|
| 87 |
+
) -> BaseAsyncFileLock:
|
| 88 |
+
if thread_local and run_in_executor:
|
| 89 |
+
msg = "run_in_executor is not supported when thread_local is True"
|
| 90 |
+
raise ValueError(msg)
|
| 91 |
+
instance = super().__call__(
|
| 92 |
+
lock_file=lock_file,
|
| 93 |
+
timeout=timeout,
|
| 94 |
+
mode=mode,
|
| 95 |
+
thread_local=thread_local,
|
| 96 |
+
blocking=blocking,
|
| 97 |
+
is_singleton=is_singleton,
|
| 98 |
+
poll_interval=poll_interval,
|
| 99 |
+
lifetime=lifetime,
|
| 100 |
+
loop=loop,
|
| 101 |
+
run_in_executor=run_in_executor,
|
| 102 |
+
executor=executor,
|
| 103 |
+
)
|
| 104 |
+
return cast("BaseAsyncFileLock", instance)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta):
|
| 108 |
+
"""
|
| 109 |
+
Base class for asynchronous file locks.
|
| 110 |
+
|
| 111 |
+
.. versionadded:: 3.15.0
|
| 112 |
+
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
def __init__( # noqa: PLR0913
|
| 116 |
+
self,
|
| 117 |
+
lock_file: str | os.PathLike[str],
|
| 118 |
+
timeout: float = -1,
|
| 119 |
+
mode: int = _UNSET_FILE_MODE,
|
| 120 |
+
thread_local: bool = False, # noqa: FBT001, FBT002
|
| 121 |
+
*,
|
| 122 |
+
blocking: bool = True,
|
| 123 |
+
is_singleton: bool = False,
|
| 124 |
+
poll_interval: float = 0.05,
|
| 125 |
+
lifetime: float | None = None,
|
| 126 |
+
loop: asyncio.AbstractEventLoop | None = None,
|
| 127 |
+
run_in_executor: bool = True,
|
| 128 |
+
executor: futures.Executor | None = None,
|
| 129 |
+
) -> None:
|
| 130 |
+
"""
|
| 131 |
+
Create a new lock object.
|
| 132 |
+
|
| 133 |
+
:param lock_file: path to the file
|
| 134 |
+
:param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in the
|
| 135 |
+
acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it to a
|
| 136 |
+
negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock.
|
| 137 |
+
:param mode: file permissions for the lockfile. When not specified, the OS controls permissions via umask and
|
| 138 |
+
default ACLs, preserving POSIX default ACL inheritance in shared directories.
|
| 139 |
+
:param thread_local: Whether this object's internal context should be thread local or not. If this is set to
|
| 140 |
+
``False`` then the lock will be reentrant across threads.
|
| 141 |
+
:param blocking: whether the lock should be blocking or not
|
| 142 |
+
:param is_singleton: If this is set to ``True`` then only one instance of this class will be created per lock
|
| 143 |
+
file. This is useful if you want to use the lock object for reentrant locking without needing to pass the
|
| 144 |
+
same object around.
|
| 145 |
+
:param poll_interval: default interval for polling the lock file, in seconds. It will be used as fallback value
|
| 146 |
+
in the acquire method, if no poll_interval value (``None``) is given.
|
| 147 |
+
:param lifetime: maximum time in seconds a lock can be held before it is considered expired. When set, a waiting
|
| 148 |
+
process will break a lock whose file modification time is older than ``lifetime`` seconds. ``None`` (the
|
| 149 |
+
default) means locks never expire.
|
| 150 |
+
:param loop: The event loop to use. If not specified, the running event loop will be used.
|
| 151 |
+
:param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor.
|
| 152 |
+
:param executor: The executor to use. If not specified, the default executor will be used.
|
| 153 |
+
|
| 154 |
+
"""
|
| 155 |
+
self._is_thread_local = thread_local
|
| 156 |
+
self._is_singleton = is_singleton
|
| 157 |
+
|
| 158 |
+
# Create the context. Note that external code should not work with the context directly and should instead use
|
| 159 |
+
# properties of this class.
|
| 160 |
+
kwargs: dict[str, Any] = {
|
| 161 |
+
"lock_file": os.fspath(lock_file),
|
| 162 |
+
"timeout": timeout,
|
| 163 |
+
"mode": mode,
|
| 164 |
+
"blocking": blocking,
|
| 165 |
+
"poll_interval": poll_interval,
|
| 166 |
+
"lifetime": lifetime,
|
| 167 |
+
"loop": loop,
|
| 168 |
+
"run_in_executor": run_in_executor,
|
| 169 |
+
"executor": executor,
|
| 170 |
+
}
|
| 171 |
+
self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)(
|
| 172 |
+
**kwargs
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
@property
|
| 176 |
+
def run_in_executor(self) -> bool:
|
| 177 |
+
""":returns: whether run in executor."""
|
| 178 |
+
return self._context.run_in_executor
|
| 179 |
+
|
| 180 |
+
@property
|
| 181 |
+
def executor(self) -> futures.Executor | None:
|
| 182 |
+
""":returns: the executor."""
|
| 183 |
+
return self._context.executor
|
| 184 |
+
|
| 185 |
+
@executor.setter
|
| 186 |
+
def executor(self, value: futures.Executor | None) -> None: # pragma: no cover
|
| 187 |
+
"""
|
| 188 |
+
Change the executor.
|
| 189 |
+
|
| 190 |
+
:param futures.Executor | None value: the new executor or ``None``
|
| 191 |
+
|
| 192 |
+
"""
|
| 193 |
+
self._context.executor = value
|
| 194 |
+
|
| 195 |
+
@property
|
| 196 |
+
def loop(self) -> asyncio.AbstractEventLoop | None:
|
| 197 |
+
""":returns: the event loop."""
|
| 198 |
+
return self._context.loop
|
| 199 |
+
|
| 200 |
+
async def acquire( # ty: ignore[invalid-method-override]
|
| 201 |
+
self,
|
| 202 |
+
timeout: float | None = None,
|
| 203 |
+
poll_interval: float | None = None,
|
| 204 |
+
*,
|
| 205 |
+
blocking: bool | None = None,
|
| 206 |
+
cancel_check: Callable[[], bool] | None = None,
|
| 207 |
+
) -> AsyncAcquireReturnProxy:
|
| 208 |
+
"""
|
| 209 |
+
Try to acquire the file lock.
|
| 210 |
+
|
| 211 |
+
:param timeout: maximum wait time for acquiring the lock, ``None`` means use the default
|
| 212 |
+
:attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and this method will block
|
| 213 |
+
until the lock could be acquired
|
| 214 |
+
:param poll_interval: interval of trying to acquire the lock file, ``None`` means use the default
|
| 215 |
+
:attr:`~BaseFileLock.poll_interval`
|
| 216 |
+
:param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
|
| 217 |
+
first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
|
| 218 |
+
:param cancel_check: a callable returning ``True`` when the acquisition should be canceled. Checked on each poll
|
| 219 |
+
iteration. When triggered, raises :class:`~Timeout` just like an expired timeout.
|
| 220 |
+
|
| 221 |
+
:returns: a context object that will unlock the file when the context is exited
|
| 222 |
+
|
| 223 |
+
:raises Timeout: if fails to acquire lock within the timeout period
|
| 224 |
+
|
| 225 |
+
.. code-block:: python
|
| 226 |
+
|
| 227 |
+
# You can use this method in the context manager (recommended)
|
| 228 |
+
with lock.acquire():
|
| 229 |
+
pass
|
| 230 |
+
|
| 231 |
+
# Or use an equivalent try-finally construct:
|
| 232 |
+
lock.acquire()
|
| 233 |
+
try:
|
| 234 |
+
pass
|
| 235 |
+
finally:
|
| 236 |
+
lock.release()
|
| 237 |
+
|
| 238 |
+
"""
|
| 239 |
+
# Use the default timeout, if no timeout is provided.
|
| 240 |
+
if timeout is None:
|
| 241 |
+
timeout = self._context.timeout
|
| 242 |
+
|
| 243 |
+
if blocking is None:
|
| 244 |
+
blocking = self._context.blocking
|
| 245 |
+
|
| 246 |
+
if poll_interval is None:
|
| 247 |
+
poll_interval = self._context.poll_interval
|
| 248 |
+
|
| 249 |
+
# Increment the number right at the beginning. We can still undo it, if something fails.
|
| 250 |
+
self._context.lock_counter += 1
|
| 251 |
+
|
| 252 |
+
lock_id = id(self)
|
| 253 |
+
lock_filename = self.lock_file
|
| 254 |
+
start_time = time.perf_counter()
|
| 255 |
+
try:
|
| 256 |
+
while True:
|
| 257 |
+
if not self.is_locked:
|
| 258 |
+
self._try_break_expired_lock()
|
| 259 |
+
_LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
|
| 260 |
+
await self._run_internal_method(self._acquire)
|
| 261 |
+
if self.is_locked:
|
| 262 |
+
_LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
|
| 263 |
+
break
|
| 264 |
+
if self._check_give_up(
|
| 265 |
+
lock_id,
|
| 266 |
+
lock_filename,
|
| 267 |
+
blocking=blocking,
|
| 268 |
+
cancel_check=cancel_check,
|
| 269 |
+
timeout=timeout,
|
| 270 |
+
start_time=start_time,
|
| 271 |
+
):
|
| 272 |
+
raise Timeout(lock_filename) # noqa: TRY301
|
| 273 |
+
msg = "Lock %s not acquired on %s, waiting %s seconds ..."
|
| 274 |
+
_LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
|
| 275 |
+
await asyncio.sleep(poll_interval)
|
| 276 |
+
except BaseException: # Something did go wrong, so decrement the counter.
|
| 277 |
+
self._context.lock_counter = max(0, self._context.lock_counter - 1)
|
| 278 |
+
raise
|
| 279 |
+
return AsyncAcquireReturnProxy(lock=self)
|
| 280 |
+
|
| 281 |
+
async def release(self, force: bool = False) -> None: # ty: ignore[invalid-method-override] # noqa: FBT001, FBT002
|
| 282 |
+
"""
|
| 283 |
+
Release the file lock. The lock is only completely released when the lock counter reaches 0. The lock file
|
| 284 |
+
itself is not automatically deleted.
|
| 285 |
+
|
| 286 |
+
:param force: If true, the lock counter is ignored and the lock is released in every case.
|
| 287 |
+
|
| 288 |
+
"""
|
| 289 |
+
if self.is_locked:
|
| 290 |
+
self._context.lock_counter -= 1
|
| 291 |
+
|
| 292 |
+
if self._context.lock_counter == 0 or force:
|
| 293 |
+
lock_id, lock_filename = id(self), self.lock_file
|
| 294 |
+
|
| 295 |
+
_LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
|
| 296 |
+
await self._run_internal_method(self._release)
|
| 297 |
+
self._context.lock_counter = 0
|
| 298 |
+
_LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
|
| 299 |
+
|
| 300 |
+
async def _run_internal_method(self, method: Callable[[], Any]) -> None:
|
| 301 |
+
if iscoroutinefunction(method):
|
| 302 |
+
await method()
|
| 303 |
+
elif self.run_in_executor:
|
| 304 |
+
loop = self.loop or asyncio.get_running_loop()
|
| 305 |
+
await loop.run_in_executor(self.executor, method)
|
| 306 |
+
else:
|
| 307 |
+
method()
|
| 308 |
+
|
| 309 |
+
def __enter__(self) -> NoReturn:
|
| 310 |
+
"""
|
| 311 |
+
Replace old __enter__ method to avoid using it.
|
| 312 |
+
|
| 313 |
+
NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD.
|
| 314 |
+
|
| 315 |
+
:returns: none
|
| 316 |
+
:rtype: NoReturn
|
| 317 |
+
|
| 318 |
+
"""
|
| 319 |
+
msg = "Do not use `with` for asyncio locks, use `async with` instead."
|
| 320 |
+
raise NotImplementedError(msg)
|
| 321 |
+
|
| 322 |
+
async def __aenter__(self) -> Self:
|
| 323 |
+
"""
|
| 324 |
+
Acquire the lock.
|
| 325 |
+
|
| 326 |
+
:returns: the lock object
|
| 327 |
+
|
| 328 |
+
"""
|
| 329 |
+
await self.acquire()
|
| 330 |
+
return self
|
| 331 |
+
|
| 332 |
+
async def __aexit__(
|
| 333 |
+
self,
|
| 334 |
+
exc_type: type[BaseException] | None,
|
| 335 |
+
exc_value: BaseException | None,
|
| 336 |
+
traceback: TracebackType | None,
|
| 337 |
+
) -> None:
|
| 338 |
+
"""
|
| 339 |
+
Release the lock.
|
| 340 |
+
|
| 341 |
+
:param exc_type: the exception type if raised
|
| 342 |
+
:param exc_value: the exception value if raised
|
| 343 |
+
:param traceback: the exception traceback if raised
|
| 344 |
+
|
| 345 |
+
"""
|
| 346 |
+
await self.release()
|
| 347 |
+
|
| 348 |
+
def __del__(self) -> None:
|
| 349 |
+
"""Called when the lock object is deleted."""
|
| 350 |
+
with contextlib.suppress(RuntimeError):
|
| 351 |
+
loop = self.loop or asyncio.get_running_loop()
|
| 352 |
+
if not loop.is_running(): # pragma: no cover
|
| 353 |
+
loop.run_until_complete(self.release(force=True))
|
| 354 |
+
else:
|
| 355 |
+
loop.create_task(self.release(force=True))
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock):
|
| 359 |
+
"""Simply watches the existence of the lock file."""
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock):
|
| 363 |
+
"""Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock):
|
| 367 |
+
"""Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems."""
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
__all__ = [
|
| 371 |
+
"AsyncAcquireReturnProxy",
|
| 372 |
+
"AsyncSoftFileLock",
|
| 373 |
+
"AsyncUnixFileLock",
|
| 374 |
+
"AsyncWindowsFileLock",
|
| 375 |
+
"BaseAsyncFileLock",
|
| 376 |
+
]
|
venv/lib/python3.10/site-packages/filelock/py.typed
ADDED
|
File without changes
|
venv/lib/python3.10/site-packages/filelock/version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# file generated by setuptools-scm
|
| 2 |
+
# don't change, don't track in version control
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"__version__",
|
| 6 |
+
"__version_tuple__",
|
| 7 |
+
"version",
|
| 8 |
+
"version_tuple",
|
| 9 |
+
"__commit_id__",
|
| 10 |
+
"commit_id",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
TYPE_CHECKING = False
|
| 14 |
+
if TYPE_CHECKING:
|
| 15 |
+
from typing import Tuple
|
| 16 |
+
from typing import Union
|
| 17 |
+
|
| 18 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
| 19 |
+
COMMIT_ID = Union[str, None]
|
| 20 |
+
else:
|
| 21 |
+
VERSION_TUPLE = object
|
| 22 |
+
COMMIT_ID = object
|
| 23 |
+
|
| 24 |
+
version: str
|
| 25 |
+
__version__: str
|
| 26 |
+
__version_tuple__: VERSION_TUPLE
|
| 27 |
+
version_tuple: VERSION_TUPLE
|
| 28 |
+
commit_id: COMMIT_ID
|
| 29 |
+
__commit_id__: COMMIT_ID
|
| 30 |
+
|
| 31 |
+
__version__ = version = '3.24.3'
|
| 32 |
+
__version_tuple__ = version_tuple = (3, 24, 3)
|
| 33 |
+
|
| 34 |
+
__commit_id__ = commit_id = None
|
venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/METADATA
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: fsspec
|
| 3 |
+
Version: 2026.2.0
|
| 4 |
+
Summary: File-system specification
|
| 5 |
+
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
| 6 |
+
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
| 7 |
+
Project-URL: Homepage, https://github.com/fsspec/filesystem_spec
|
| 8 |
+
Maintainer-email: Martin Durant <mdurant@anaconda.com>
|
| 9 |
+
License-Expression: BSD-3-Clause
|
| 10 |
+
License-File: LICENSE
|
| 11 |
+
Keywords: file
|
| 12 |
+
Classifier: Development Status :: 4 - Beta
|
| 13 |
+
Classifier: Intended Audience :: Developers
|
| 14 |
+
Classifier: Operating System :: OS Independent
|
| 15 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 16 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 17 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.14
|
| 20 |
+
Requires-Python: >=3.10
|
| 21 |
+
Provides-Extra: abfs
|
| 22 |
+
Requires-Dist: adlfs; extra == 'abfs'
|
| 23 |
+
Provides-Extra: adl
|
| 24 |
+
Requires-Dist: adlfs; extra == 'adl'
|
| 25 |
+
Provides-Extra: arrow
|
| 26 |
+
Requires-Dist: pyarrow>=1; extra == 'arrow'
|
| 27 |
+
Provides-Extra: dask
|
| 28 |
+
Requires-Dist: dask; extra == 'dask'
|
| 29 |
+
Requires-Dist: distributed; extra == 'dask'
|
| 30 |
+
Provides-Extra: dev
|
| 31 |
+
Requires-Dist: pre-commit; extra == 'dev'
|
| 32 |
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
| 33 |
+
Provides-Extra: doc
|
| 34 |
+
Requires-Dist: numpydoc; extra == 'doc'
|
| 35 |
+
Requires-Dist: sphinx; extra == 'doc'
|
| 36 |
+
Requires-Dist: sphinx-design; extra == 'doc'
|
| 37 |
+
Requires-Dist: sphinx-rtd-theme; extra == 'doc'
|
| 38 |
+
Requires-Dist: yarl; extra == 'doc'
|
| 39 |
+
Provides-Extra: dropbox
|
| 40 |
+
Requires-Dist: dropbox; extra == 'dropbox'
|
| 41 |
+
Requires-Dist: dropboxdrivefs; extra == 'dropbox'
|
| 42 |
+
Requires-Dist: requests; extra == 'dropbox'
|
| 43 |
+
Provides-Extra: entrypoints
|
| 44 |
+
Provides-Extra: full
|
| 45 |
+
Requires-Dist: adlfs; extra == 'full'
|
| 46 |
+
Requires-Dist: aiohttp!=4.0.0a0,!=4.0.0a1; extra == 'full'
|
| 47 |
+
Requires-Dist: dask; extra == 'full'
|
| 48 |
+
Requires-Dist: distributed; extra == 'full'
|
| 49 |
+
Requires-Dist: dropbox; extra == 'full'
|
| 50 |
+
Requires-Dist: dropboxdrivefs; extra == 'full'
|
| 51 |
+
Requires-Dist: fusepy; extra == 'full'
|
| 52 |
+
Requires-Dist: gcsfs>2024.2.0; extra == 'full'
|
| 53 |
+
Requires-Dist: libarchive-c; extra == 'full'
|
| 54 |
+
Requires-Dist: ocifs; extra == 'full'
|
| 55 |
+
Requires-Dist: panel; extra == 'full'
|
| 56 |
+
Requires-Dist: paramiko; extra == 'full'
|
| 57 |
+
Requires-Dist: pyarrow>=1; extra == 'full'
|
| 58 |
+
Requires-Dist: pygit2; extra == 'full'
|
| 59 |
+
Requires-Dist: requests; extra == 'full'
|
| 60 |
+
Requires-Dist: s3fs>2024.2.0; extra == 'full'
|
| 61 |
+
Requires-Dist: smbprotocol; extra == 'full'
|
| 62 |
+
Requires-Dist: tqdm; extra == 'full'
|
| 63 |
+
Provides-Extra: fuse
|
| 64 |
+
Requires-Dist: fusepy; extra == 'fuse'
|
| 65 |
+
Provides-Extra: gcs
|
| 66 |
+
Requires-Dist: gcsfs>2024.2.0; extra == 'gcs'
|
| 67 |
+
Provides-Extra: git
|
| 68 |
+
Requires-Dist: pygit2; extra == 'git'
|
| 69 |
+
Provides-Extra: github
|
| 70 |
+
Requires-Dist: requests; extra == 'github'
|
| 71 |
+
Provides-Extra: gs
|
| 72 |
+
Requires-Dist: gcsfs; extra == 'gs'
|
| 73 |
+
Provides-Extra: gui
|
| 74 |
+
Requires-Dist: panel; extra == 'gui'
|
| 75 |
+
Provides-Extra: hdfs
|
| 76 |
+
Requires-Dist: pyarrow>=1; extra == 'hdfs'
|
| 77 |
+
Provides-Extra: http
|
| 78 |
+
Requires-Dist: aiohttp!=4.0.0a0,!=4.0.0a1; extra == 'http'
|
| 79 |
+
Provides-Extra: libarchive
|
| 80 |
+
Requires-Dist: libarchive-c; extra == 'libarchive'
|
| 81 |
+
Provides-Extra: oci
|
| 82 |
+
Requires-Dist: ocifs; extra == 'oci'
|
| 83 |
+
Provides-Extra: s3
|
| 84 |
+
Requires-Dist: s3fs>2024.2.0; extra == 's3'
|
| 85 |
+
Provides-Extra: sftp
|
| 86 |
+
Requires-Dist: paramiko; extra == 'sftp'
|
| 87 |
+
Provides-Extra: smb
|
| 88 |
+
Requires-Dist: smbprotocol; extra == 'smb'
|
| 89 |
+
Provides-Extra: ssh
|
| 90 |
+
Requires-Dist: paramiko; extra == 'ssh'
|
| 91 |
+
Provides-Extra: test
|
| 92 |
+
Requires-Dist: aiohttp!=4.0.0a0,!=4.0.0a1; extra == 'test'
|
| 93 |
+
Requires-Dist: numpy; extra == 'test'
|
| 94 |
+
Requires-Dist: pytest; extra == 'test'
|
| 95 |
+
Requires-Dist: pytest-asyncio!=0.22.0; extra == 'test'
|
| 96 |
+
Requires-Dist: pytest-benchmark; extra == 'test'
|
| 97 |
+
Requires-Dist: pytest-cov; extra == 'test'
|
| 98 |
+
Requires-Dist: pytest-mock; extra == 'test'
|
| 99 |
+
Requires-Dist: pytest-recording; extra == 'test'
|
| 100 |
+
Requires-Dist: pytest-rerunfailures; extra == 'test'
|
| 101 |
+
Requires-Dist: requests; extra == 'test'
|
| 102 |
+
Provides-Extra: test-downstream
|
| 103 |
+
Requires-Dist: aiobotocore<3.0.0,>=2.5.4; extra == 'test-downstream'
|
| 104 |
+
Requires-Dist: dask[dataframe,test]; extra == 'test-downstream'
|
| 105 |
+
Requires-Dist: moto[server]<5,>4; extra == 'test-downstream'
|
| 106 |
+
Requires-Dist: pytest-timeout; extra == 'test-downstream'
|
| 107 |
+
Requires-Dist: xarray; extra == 'test-downstream'
|
| 108 |
+
Provides-Extra: test-full
|
| 109 |
+
Requires-Dist: adlfs; extra == 'test-full'
|
| 110 |
+
Requires-Dist: aiohttp!=4.0.0a0,!=4.0.0a1; extra == 'test-full'
|
| 111 |
+
Requires-Dist: backports-zstd; (python_version < '3.14') and extra == 'test-full'
|
| 112 |
+
Requires-Dist: cloudpickle; extra == 'test-full'
|
| 113 |
+
Requires-Dist: dask; extra == 'test-full'
|
| 114 |
+
Requires-Dist: distributed; extra == 'test-full'
|
| 115 |
+
Requires-Dist: dropbox; extra == 'test-full'
|
| 116 |
+
Requires-Dist: dropboxdrivefs; extra == 'test-full'
|
| 117 |
+
Requires-Dist: fastparquet; extra == 'test-full'
|
| 118 |
+
Requires-Dist: fusepy; extra == 'test-full'
|
| 119 |
+
Requires-Dist: gcsfs; extra == 'test-full'
|
| 120 |
+
Requires-Dist: jinja2; extra == 'test-full'
|
| 121 |
+
Requires-Dist: kerchunk; extra == 'test-full'
|
| 122 |
+
Requires-Dist: libarchive-c; extra == 'test-full'
|
| 123 |
+
Requires-Dist: lz4; extra == 'test-full'
|
| 124 |
+
Requires-Dist: notebook; extra == 'test-full'
|
| 125 |
+
Requires-Dist: numpy; extra == 'test-full'
|
| 126 |
+
Requires-Dist: ocifs; extra == 'test-full'
|
| 127 |
+
Requires-Dist: pandas<3.0.0; extra == 'test-full'
|
| 128 |
+
Requires-Dist: panel; extra == 'test-full'
|
| 129 |
+
Requires-Dist: paramiko; extra == 'test-full'
|
| 130 |
+
Requires-Dist: pyarrow; extra == 'test-full'
|
| 131 |
+
Requires-Dist: pyarrow>=1; extra == 'test-full'
|
| 132 |
+
Requires-Dist: pyftpdlib; extra == 'test-full'
|
| 133 |
+
Requires-Dist: pygit2; extra == 'test-full'
|
| 134 |
+
Requires-Dist: pytest; extra == 'test-full'
|
| 135 |
+
Requires-Dist: pytest-asyncio!=0.22.0; extra == 'test-full'
|
| 136 |
+
Requires-Dist: pytest-benchmark; extra == 'test-full'
|
| 137 |
+
Requires-Dist: pytest-cov; extra == 'test-full'
|
| 138 |
+
Requires-Dist: pytest-mock; extra == 'test-full'
|
| 139 |
+
Requires-Dist: pytest-recording; extra == 'test-full'
|
| 140 |
+
Requires-Dist: pytest-rerunfailures; extra == 'test-full'
|
| 141 |
+
Requires-Dist: python-snappy; extra == 'test-full'
|
| 142 |
+
Requires-Dist: requests; extra == 'test-full'
|
| 143 |
+
Requires-Dist: smbprotocol; extra == 'test-full'
|
| 144 |
+
Requires-Dist: tqdm; extra == 'test-full'
|
| 145 |
+
Requires-Dist: urllib3; extra == 'test-full'
|
| 146 |
+
Requires-Dist: zarr; extra == 'test-full'
|
| 147 |
+
Requires-Dist: zstandard; (python_version < '3.14') and extra == 'test-full'
|
| 148 |
+
Provides-Extra: tqdm
|
| 149 |
+
Requires-Dist: tqdm; extra == 'tqdm'
|
| 150 |
+
Description-Content-Type: text/markdown
|
| 151 |
+
|
| 152 |
+
# filesystem_spec
|
| 153 |
+
|
| 154 |
+
[](https://pypi.python.org/pypi/fsspec/)
|
| 155 |
+
[](https://anaconda.org/conda-forge/fsspec)
|
| 156 |
+

|
| 157 |
+
[](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
| 158 |
+
|
| 159 |
+
A specification for pythonic filesystems.
|
| 160 |
+
|
| 161 |
+
## Install
|
| 162 |
+
|
| 163 |
+
```bash
|
| 164 |
+
pip install fsspec
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
would install the base fsspec. Various optionally supported features might require specification of custom
|
| 168 |
+
extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
|
| 169 |
+
Use `pip install fsspec[full]` for installation of all known extra dependencies.
|
| 170 |
+
|
| 171 |
+
Up-to-date package also provided through conda-forge distribution:
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
conda install -c conda-forge fsspec
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
## Purpose
|
| 179 |
+
|
| 180 |
+
To produce a template or specification for a file-system interface, that specific implementations should follow,
|
| 181 |
+
so that applications making use of them can rely on a common behaviour and not have to worry about the specific
|
| 182 |
+
internal implementation decisions with any given backend. Many such implementations are included in this package,
|
| 183 |
+
or in sister projects such as `s3fs` and `gcsfs`.
|
| 184 |
+
|
| 185 |
+
In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
|
| 186 |
+
mounting of the file-system implementation may be available for all implementations "for free".
|
| 187 |
+
|
| 188 |
+
## Documentation
|
| 189 |
+
|
| 190 |
+
Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
| 191 |
+
|
| 192 |
+
## Develop
|
| 193 |
+
|
| 194 |
+
fsspec uses GitHub Actions for CI. Environment files can be found
|
| 195 |
+
in the "ci/" directory. Note that the main environment is called "py38",
|
| 196 |
+
but it is expected that the version of python installed be adjustable at
|
| 197 |
+
CI runtime. For local use, pick a version suitable for you.
|
| 198 |
+
|
| 199 |
+
```bash
|
| 200 |
+
# For a new environment (mamba / conda).
|
| 201 |
+
mamba create -n fsspec -c conda-forge python=3.10 -y
|
| 202 |
+
conda activate fsspec
|
| 203 |
+
|
| 204 |
+
# Standard dev install with docs and tests.
|
| 205 |
+
pip install -e ".[dev,doc,test]"
|
| 206 |
+
|
| 207 |
+
# Full tests except for downstream
|
| 208 |
+
pip install s3fs
|
| 209 |
+
pip uninstall s3fs
|
| 210 |
+
pip install -e .[dev,doc,test_full]
|
| 211 |
+
pip install s3fs --no-deps
|
| 212 |
+
pytest -v
|
| 213 |
+
|
| 214 |
+
# Downstream tests.
|
| 215 |
+
sh install_s3fs.sh
|
| 216 |
+
# Windows powershell.
|
| 217 |
+
install_s3fs.sh
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
### Testing
|
| 221 |
+
|
| 222 |
+
Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
|
| 223 |
+
|
| 224 |
+
The full fsspec suite requires a system-level docker, docker-compose, and fuse
|
| 225 |
+
installation. If only making changes to one backend implementation, it is
|
| 226 |
+
not generally necessary to run all tests locally.
|
| 227 |
+
|
| 228 |
+
It is expected that contributors ensure that any change to fsspec does not
|
| 229 |
+
cause issues or regressions for either other fsspec-related packages such
|
| 230 |
+
as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
|
| 231 |
+
run and corresponding environment file run a set of tests from the dask
|
| 232 |
+
test suite, and very minimal tests against pandas and zarr from the
|
| 233 |
+
test_downstream.py module in this repo.
|
| 234 |
+
|
| 235 |
+
### Code Formatting
|
| 236 |
+
|
| 237 |
+
fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
|
| 238 |
+
a consistent code format throughout the project.
|
| 239 |
+
Run ``black fsspec`` from the root of the filesystem_spec repository to
|
| 240 |
+
auto-format your code. Additionally, many editors have plugins that will apply
|
| 241 |
+
``black`` as you edit files. ``black`` is included in the ``tox`` environments.
|
| 242 |
+
|
| 243 |
+
Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
|
| 244 |
+
automatically run ``black`` when you make a git commit.
|
| 245 |
+
Run ``pre-commit install --install-hooks`` from the root of the
|
| 246 |
+
filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
|
| 247 |
+
before you commit, reformatting any changed files. You can format without
|
| 248 |
+
committing via ``pre-commit run`` or skip these checks with ``git commit
|
| 249 |
+
--no-verify``.
|
| 250 |
+
|
| 251 |
+
## Support
|
| 252 |
+
|
| 253 |
+
Work on this repository is supported in part by:
|
| 254 |
+
|
| 255 |
+
"Anaconda, Inc. - Advancing AI through open source."
|
| 256 |
+
|
| 257 |
+
<a href="https://anaconda.com/"><img src="https://camo.githubusercontent.com/b8555ef2222598ed37ce38ac86955febbd25de7619931bb7dd3c58432181d3b6/68747470733a2f2f626565776172652e6f72672f636f6d6d756e6974792f6d656d626572732f616e61636f6e64612f616e61636f6e64612d6c617267652e706e67" alt="anaconda logo" width="40%"/></a>
|
venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/RECORD
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fsspec-2026.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
fsspec-2026.2.0.dist-info/METADATA,sha256=Pw6QhbyXeg-elb0hpWPiQRpAeMD_ApJ9vdNFoYCnrPs,10524
|
| 3 |
+
fsspec-2026.2.0.dist-info/RECORD,,
|
| 4 |
+
fsspec-2026.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
| 5 |
+
fsspec-2026.2.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
| 6 |
+
fsspec/__init__.py,sha256=L7qwNBU1iMNQd8Of87HYSNFT9gWlNMSESaJC8fY0AaQ,2053
|
| 7 |
+
fsspec/__pycache__/__init__.cpython-310.pyc,,
|
| 8 |
+
fsspec/__pycache__/_version.cpython-310.pyc,,
|
| 9 |
+
fsspec/__pycache__/archive.cpython-310.pyc,,
|
| 10 |
+
fsspec/__pycache__/asyn.cpython-310.pyc,,
|
| 11 |
+
fsspec/__pycache__/caching.cpython-310.pyc,,
|
| 12 |
+
fsspec/__pycache__/callbacks.cpython-310.pyc,,
|
| 13 |
+
fsspec/__pycache__/compression.cpython-310.pyc,,
|
| 14 |
+
fsspec/__pycache__/config.cpython-310.pyc,,
|
| 15 |
+
fsspec/__pycache__/conftest.cpython-310.pyc,,
|
| 16 |
+
fsspec/__pycache__/core.cpython-310.pyc,,
|
| 17 |
+
fsspec/__pycache__/dircache.cpython-310.pyc,,
|
| 18 |
+
fsspec/__pycache__/exceptions.cpython-310.pyc,,
|
| 19 |
+
fsspec/__pycache__/fuse.cpython-310.pyc,,
|
| 20 |
+
fsspec/__pycache__/generic.cpython-310.pyc,,
|
| 21 |
+
fsspec/__pycache__/gui.cpython-310.pyc,,
|
| 22 |
+
fsspec/__pycache__/json.cpython-310.pyc,,
|
| 23 |
+
fsspec/__pycache__/mapping.cpython-310.pyc,,
|
| 24 |
+
fsspec/__pycache__/parquet.cpython-310.pyc,,
|
| 25 |
+
fsspec/__pycache__/registry.cpython-310.pyc,,
|
| 26 |
+
fsspec/__pycache__/spec.cpython-310.pyc,,
|
| 27 |
+
fsspec/__pycache__/transaction.cpython-310.pyc,,
|
| 28 |
+
fsspec/__pycache__/utils.cpython-310.pyc,,
|
| 29 |
+
fsspec/_version.py,sha256=AEamMn8IHx_wGXrogRYVBycv6M5u2_UCmFedn7R8hYI,710
|
| 30 |
+
fsspec/archive.py,sha256=vM6t_lgV6lBWbBYwpm3S4ofBQFQxUPr5KkDQrrQcQro,2411
|
| 31 |
+
fsspec/asyn.py,sha256=LP_OicTWXmKHe31wBoYs2MrrNf8rmlhjVeGg5AqvVy8,36630
|
| 32 |
+
fsspec/caching.py,sha256=8IJ4rgcWnvq_b_DqlcMGJ-K59d4Db5O9Gz8PkATAgHo,34023
|
| 33 |
+
fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
|
| 34 |
+
fsspec/compression.py,sha256=3v_Fe39gzRRWfaeXpzNjAGPqgTzmETYRCo3qHVqD3po,5132
|
| 35 |
+
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
| 36 |
+
fsspec/conftest.py,sha256=uWfm_Qs5alPRxOhRpDfQ0-1jqSJ54pni4y96IxOREXM,3446
|
| 37 |
+
fsspec/core.py,sha256=lc7XSnZU6_C6xljp7Z_xEGN3V7704hbeQLkxvPP0wds,24173
|
| 38 |
+
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
| 39 |
+
fsspec/exceptions.py,sha256=pauSLDMxzTJMOjvX1WEUK0cMyFkrFxpWJsyFywav7A8,331
|
| 40 |
+
fsspec/fuse.py,sha256=Q-3NOOyLqBfYa4Db5E19z_ZY36zzYHtIs1mOUasItBQ,10177
|
| 41 |
+
fsspec/generic.py,sha256=9QHQYMNb-8w8-eYuIqShcTjO_LeHXFoQTyt8J5oEq5Q,13482
|
| 42 |
+
fsspec/gui.py,sha256=CQ7QsrTpaDlWSLNOpwNoJc7khOcYXIZxmrAJN9bHWQU,14002
|
| 43 |
+
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 44 |
+
fsspec/implementations/__pycache__/__init__.cpython-310.pyc,,
|
| 45 |
+
fsspec/implementations/__pycache__/arrow.cpython-310.pyc,,
|
| 46 |
+
fsspec/implementations/__pycache__/asyn_wrapper.cpython-310.pyc,,
|
| 47 |
+
fsspec/implementations/__pycache__/cache_mapper.cpython-310.pyc,,
|
| 48 |
+
fsspec/implementations/__pycache__/cache_metadata.cpython-310.pyc,,
|
| 49 |
+
fsspec/implementations/__pycache__/cached.cpython-310.pyc,,
|
| 50 |
+
fsspec/implementations/__pycache__/chained.cpython-310.pyc,,
|
| 51 |
+
fsspec/implementations/__pycache__/dask.cpython-310.pyc,,
|
| 52 |
+
fsspec/implementations/__pycache__/data.cpython-310.pyc,,
|
| 53 |
+
fsspec/implementations/__pycache__/dbfs.cpython-310.pyc,,
|
| 54 |
+
fsspec/implementations/__pycache__/dirfs.cpython-310.pyc,,
|
| 55 |
+
fsspec/implementations/__pycache__/ftp.cpython-310.pyc,,
|
| 56 |
+
fsspec/implementations/__pycache__/gist.cpython-310.pyc,,
|
| 57 |
+
fsspec/implementations/__pycache__/git.cpython-310.pyc,,
|
| 58 |
+
fsspec/implementations/__pycache__/github.cpython-310.pyc,,
|
| 59 |
+
fsspec/implementations/__pycache__/http.cpython-310.pyc,,
|
| 60 |
+
fsspec/implementations/__pycache__/http_sync.cpython-310.pyc,,
|
| 61 |
+
fsspec/implementations/__pycache__/jupyter.cpython-310.pyc,,
|
| 62 |
+
fsspec/implementations/__pycache__/libarchive.cpython-310.pyc,,
|
| 63 |
+
fsspec/implementations/__pycache__/local.cpython-310.pyc,,
|
| 64 |
+
fsspec/implementations/__pycache__/memory.cpython-310.pyc,,
|
| 65 |
+
fsspec/implementations/__pycache__/reference.cpython-310.pyc,,
|
| 66 |
+
fsspec/implementations/__pycache__/sftp.cpython-310.pyc,,
|
| 67 |
+
fsspec/implementations/__pycache__/smb.cpython-310.pyc,,
|
| 68 |
+
fsspec/implementations/__pycache__/tar.cpython-310.pyc,,
|
| 69 |
+
fsspec/implementations/__pycache__/webhdfs.cpython-310.pyc,,
|
| 70 |
+
fsspec/implementations/__pycache__/zip.cpython-310.pyc,,
|
| 71 |
+
fsspec/implementations/arrow.py,sha256=8FhvcvOYLZNMMegCYFFCEHgEqig8AkOU7Ehb8XfcgnA,8890
|
| 72 |
+
fsspec/implementations/asyn_wrapper.py,sha256=3lfJkGs6D_AwRBdxTSYlL-RCVdaXBZ9Itys2P5o5Si0,3738
|
| 73 |
+
fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
|
| 74 |
+
fsspec/implementations/cache_metadata.py,sha256=ipIe4S8nlU_M9oRJkvTqr-b0tcbXVZsxH3GxaelaNOY,8502
|
| 75 |
+
fsspec/implementations/cached.py,sha256=67ipbj-3o8O1zMGR11rZ_IWCi_7h-VRYpEAowFXqrvA,36175
|
| 76 |
+
fsspec/implementations/chained.py,sha256=iGivpNaHUFjB_ea0-HAPhcmm6CL8qnDf270PSj7JwuE,680
|
| 77 |
+
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
| 78 |
+
fsspec/implementations/data.py,sha256=IhOGDkacYp5gkl9jhEu4msQfZPb0gS5Q_ml7Mbr6dgQ,1627
|
| 79 |
+
fsspec/implementations/dbfs.py,sha256=1cvvC6KBWOb8pBVpc01xavVbEPXO1xsgZvPD7H73M9k,16217
|
| 80 |
+
fsspec/implementations/dirfs.py,sha256=VNj6gPMfmmLPK4wxbtxt7mUqW7xkh2XDgMmEmSK_E1c,12166
|
| 81 |
+
fsspec/implementations/ftp.py,sha256=fJhaMIKq2RvzYlLwG3bewy2jq4iRqjVt1aIpwtUIRwI,13235
|
| 82 |
+
fsspec/implementations/gist.py,sha256=Y6jTDrE-wuTwvpPyAQDuuOMBGxlajafKWoB1_yX6jdY,8528
|
| 83 |
+
fsspec/implementations/git.py,sha256=qBDWMz5LNllPqVjr5jf_1FuNha4P5lyQI3IlhYg-wUE,3731
|
| 84 |
+
fsspec/implementations/github.py,sha256=aCsZL8UvXZgdkcB1RUs3DdLeNrjLKcFsFYeQFDWbBFo,11653
|
| 85 |
+
fsspec/implementations/http.py,sha256=-AV5qeNpBWqnsmgnIO9Ily9B6--SR4sQJ7G4cBHarGE,30675
|
| 86 |
+
fsspec/implementations/http_sync.py,sha256=UmBqd938ebwVjYgVtzg-ysG3ZoGhIJw0wFtQAfxV3Aw,30332
|
| 87 |
+
fsspec/implementations/jupyter.py,sha256=q1PlQ66AAswGFyr8MFKWyobaV2YekMWRtqENBDQtD28,4002
|
| 88 |
+
fsspec/implementations/libarchive.py,sha256=SpIA1F-zf7kb2-VYUVuhMrXTBOhBxUXKgEW1RaAdDoA,7098
|
| 89 |
+
fsspec/implementations/local.py,sha256=ERDUdXdRI8AvRX06icXaDKwO-hcQgivc7EorqnayFFM,17028
|
| 90 |
+
fsspec/implementations/memory.py,sha256=TDdLtSPWXxZKrrVGwmc3uS3oK_2mlcVTk2BiqR8IeII,10507
|
| 91 |
+
fsspec/implementations/reference.py,sha256=xSUpB8o_QFAZiVJE2dt78QZMCUMLo5TaJ27e5DwDAfg,48814
|
| 92 |
+
fsspec/implementations/sftp.py,sha256=L9pZOa6eLUWfJNtxkxeG2YI96SQwrM5Hj6ocyUZXUbg,5923
|
| 93 |
+
fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
|
| 94 |
+
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
| 95 |
+
fsspec/implementations/webhdfs.py,sha256=osF2m0nhDil6sbMzYW_4DZzhxF4ygtb59XDiybd9Fyg,17589
|
| 96 |
+
fsspec/implementations/zip.py,sha256=6f3z0s12tDbz1RMx7iDc3JDx730IAaKDdx7tf_XYDp0,6151
|
| 97 |
+
fsspec/json.py,sha256=4EBZ-xOmRiyxmIqPIwxmDImosRQ7io7qBM2xjJPsEE4,3768
|
| 98 |
+
fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
|
| 99 |
+
fsspec/parquet.py,sha256=xGW3xfd9js7hrre7qN85XpSM0A1FObqkTcAv_H2xSwY,20505
|
| 100 |
+
fsspec/registry.py,sha256=o7EGl8TEaLkcwN53X_103arzuzJeeOoVaNUWnPiXgf0,12148
|
| 101 |
+
fsspec/spec.py,sha256=Ym-Ust6LRjHgbhrmvNqwOBZxoVnaw3g3xHXMZGHx_xg,77692
|
| 102 |
+
fsspec/tests/abstract/__init__.py,sha256=4xUJrv7gDgc85xAOz1p-V_K1hrsdMWTSa0rviALlJk8,10181
|
| 103 |
+
fsspec/tests/abstract/__pycache__/__init__.cpython-310.pyc,,
|
| 104 |
+
fsspec/tests/abstract/__pycache__/common.cpython-310.pyc,,
|
| 105 |
+
fsspec/tests/abstract/__pycache__/copy.cpython-310.pyc,,
|
| 106 |
+
fsspec/tests/abstract/__pycache__/get.cpython-310.pyc,,
|
| 107 |
+
fsspec/tests/abstract/__pycache__/mv.cpython-310.pyc,,
|
| 108 |
+
fsspec/tests/abstract/__pycache__/open.cpython-310.pyc,,
|
| 109 |
+
fsspec/tests/abstract/__pycache__/pipe.cpython-310.pyc,,
|
| 110 |
+
fsspec/tests/abstract/__pycache__/put.cpython-310.pyc,,
|
| 111 |
+
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
| 112 |
+
fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
|
| 113 |
+
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
| 114 |
+
fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1982
|
| 115 |
+
fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
|
| 116 |
+
fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
|
| 117 |
+
fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
|
| 118 |
+
fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
|
| 119 |
+
fsspec/utils.py,sha256=E24ji0XLWC6n3bw2sHA28OYxrGU9Wy_al2XydsRgrRk,23623
|
venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: hatchling 1.28.0
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
venv/lib/python3.10/site-packages/fsspec-2026.2.0.dist-info/licenses/LICENSE
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
BSD 3-Clause License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2018, Martin Durant
|
| 4 |
+
All rights reserved.
|
| 5 |
+
|
| 6 |
+
Redistribution and use in source and binary forms, with or without
|
| 7 |
+
modification, are permitted provided that the following conditions are met:
|
| 8 |
+
|
| 9 |
+
* Redistributions of source code must retain the above copyright notice, this
|
| 10 |
+
list of conditions and the following disclaimer.
|
| 11 |
+
|
| 12 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
| 13 |
+
this list of conditions and the following disclaimer in the documentation
|
| 14 |
+
and/or other materials provided with the distribution.
|
| 15 |
+
|
| 16 |
+
* Neither the name of the copyright holder nor the names of its
|
| 17 |
+
contributors may be used to endorse or promote products derived from
|
| 18 |
+
this software without specific prior written permission.
|
| 19 |
+
|
| 20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
| 24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
| 25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
| 27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
| 28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
venv/lib/python3.10/site-packages/fsspec/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from . import caching
|
| 2 |
+
from ._version import __version__ # noqa: F401
|
| 3 |
+
from .callbacks import Callback
|
| 4 |
+
from .compression import available_compressions
|
| 5 |
+
from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
|
| 6 |
+
from .exceptions import FSTimeoutError
|
| 7 |
+
from .mapping import FSMap, get_mapper
|
| 8 |
+
from .registry import (
|
| 9 |
+
available_protocols,
|
| 10 |
+
filesystem,
|
| 11 |
+
get_filesystem_class,
|
| 12 |
+
register_implementation,
|
| 13 |
+
registry,
|
| 14 |
+
)
|
| 15 |
+
from .spec import AbstractFileSystem
|
| 16 |
+
|
| 17 |
+
__all__ = [
|
| 18 |
+
"AbstractFileSystem",
|
| 19 |
+
"FSTimeoutError",
|
| 20 |
+
"FSMap",
|
| 21 |
+
"filesystem",
|
| 22 |
+
"register_implementation",
|
| 23 |
+
"get_filesystem_class",
|
| 24 |
+
"get_fs_token_paths",
|
| 25 |
+
"get_mapper",
|
| 26 |
+
"open",
|
| 27 |
+
"open_files",
|
| 28 |
+
"open_local",
|
| 29 |
+
"registry",
|
| 30 |
+
"caching",
|
| 31 |
+
"Callback",
|
| 32 |
+
"available_protocols",
|
| 33 |
+
"available_compressions",
|
| 34 |
+
"url_to_fs",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def process_entries():
|
| 39 |
+
try:
|
| 40 |
+
from importlib.metadata import entry_points
|
| 41 |
+
except ImportError:
|
| 42 |
+
return
|
| 43 |
+
if entry_points is not None:
|
| 44 |
+
try:
|
| 45 |
+
eps = entry_points()
|
| 46 |
+
except TypeError:
|
| 47 |
+
pass # importlib-metadata < 0.8
|
| 48 |
+
else:
|
| 49 |
+
if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
|
| 50 |
+
specs = eps.select(group="fsspec.specs")
|
| 51 |
+
else:
|
| 52 |
+
specs = eps.get("fsspec.specs", [])
|
| 53 |
+
registered_names = {}
|
| 54 |
+
for spec in specs:
|
| 55 |
+
err_msg = f"Unable to load filesystem from {spec}"
|
| 56 |
+
name = spec.name
|
| 57 |
+
if name in registered_names:
|
| 58 |
+
continue
|
| 59 |
+
registered_names[name] = True
|
| 60 |
+
register_implementation(
|
| 61 |
+
name,
|
| 62 |
+
spec.value.replace(":", "."),
|
| 63 |
+
errtxt=err_msg,
|
| 64 |
+
# We take our implementations as the ones to overload with if
|
| 65 |
+
# for some reason we encounter some, may be the same, already
|
| 66 |
+
# registered
|
| 67 |
+
clobber=True,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
process_entries()
|
venv/lib/python3.10/site-packages/fsspec/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# file generated by setuptools-scm
|
| 2 |
+
# don't change, don't track in version control
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"__version__",
|
| 6 |
+
"__version_tuple__",
|
| 7 |
+
"version",
|
| 8 |
+
"version_tuple",
|
| 9 |
+
"__commit_id__",
|
| 10 |
+
"commit_id",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
TYPE_CHECKING = False
|
| 14 |
+
if TYPE_CHECKING:
|
| 15 |
+
from typing import Tuple
|
| 16 |
+
from typing import Union
|
| 17 |
+
|
| 18 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
| 19 |
+
COMMIT_ID = Union[str, None]
|
| 20 |
+
else:
|
| 21 |
+
VERSION_TUPLE = object
|
| 22 |
+
COMMIT_ID = object
|
| 23 |
+
|
| 24 |
+
version: str
|
| 25 |
+
__version__: str
|
| 26 |
+
__version_tuple__: VERSION_TUPLE
|
| 27 |
+
version_tuple: VERSION_TUPLE
|
| 28 |
+
commit_id: COMMIT_ID
|
| 29 |
+
__commit_id__: COMMIT_ID
|
| 30 |
+
|
| 31 |
+
__version__ = version = '2026.2.0'
|
| 32 |
+
__version_tuple__ = version_tuple = (2026, 2, 0)
|
| 33 |
+
|
| 34 |
+
__commit_id__ = commit_id = None
|
venv/lib/python3.10/site-packages/fsspec/archive.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import operator
|
| 2 |
+
|
| 3 |
+
from fsspec import AbstractFileSystem
|
| 4 |
+
from fsspec.utils import tokenize
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class AbstractArchiveFileSystem(AbstractFileSystem):
|
| 8 |
+
"""
|
| 9 |
+
A generic superclass for implementing Archive-based filesystems.
|
| 10 |
+
|
| 11 |
+
Currently, it is shared amongst
|
| 12 |
+
:class:`~fsspec.implementations.zip.ZipFileSystem`,
|
| 13 |
+
:class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
|
| 14 |
+
:class:`~fsspec.implementations.tar.TarFileSystem`.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __str__(self):
|
| 18 |
+
return f"<Archive-like object {type(self).__name__} at {id(self)}>"
|
| 19 |
+
|
| 20 |
+
__repr__ = __str__
|
| 21 |
+
|
| 22 |
+
def ukey(self, path):
|
| 23 |
+
return tokenize(path, self.fo, self.protocol)
|
| 24 |
+
|
| 25 |
+
def _all_dirnames(self, paths):
|
| 26 |
+
"""Returns *all* directory names for each path in paths, including intermediate
|
| 27 |
+
ones.
|
| 28 |
+
|
| 29 |
+
Parameters
|
| 30 |
+
----------
|
| 31 |
+
paths: Iterable of path strings
|
| 32 |
+
"""
|
| 33 |
+
if len(paths) == 0:
|
| 34 |
+
return set()
|
| 35 |
+
|
| 36 |
+
dirnames = {self._parent(path) for path in paths} - {self.root_marker}
|
| 37 |
+
return dirnames | self._all_dirnames(dirnames)
|
| 38 |
+
|
| 39 |
+
def info(self, path, **kwargs):
|
| 40 |
+
self._get_dirs()
|
| 41 |
+
path = self._strip_protocol(path)
|
| 42 |
+
if path in {"", "/"} and self.dir_cache:
|
| 43 |
+
return {"name": "", "type": "directory", "size": 0}
|
| 44 |
+
if path in self.dir_cache:
|
| 45 |
+
return self.dir_cache[path]
|
| 46 |
+
elif path + "/" in self.dir_cache:
|
| 47 |
+
return self.dir_cache[path + "/"]
|
| 48 |
+
else:
|
| 49 |
+
raise FileNotFoundError(path)
|
| 50 |
+
|
| 51 |
+
def ls(self, path, detail=True, **kwargs):
|
| 52 |
+
self._get_dirs()
|
| 53 |
+
paths = {}
|
| 54 |
+
for p, f in self.dir_cache.items():
|
| 55 |
+
p = p.rstrip("/")
|
| 56 |
+
if "/" in p:
|
| 57 |
+
root = p.rsplit("/", 1)[0]
|
| 58 |
+
else:
|
| 59 |
+
root = ""
|
| 60 |
+
if root == path.rstrip("/"):
|
| 61 |
+
paths[p] = f
|
| 62 |
+
elif all(
|
| 63 |
+
(a == b)
|
| 64 |
+
for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
|
| 65 |
+
):
|
| 66 |
+
# root directory entry
|
| 67 |
+
ppath = p.rstrip("/").split("/", 1)[0]
|
| 68 |
+
if ppath not in paths:
|
| 69 |
+
out = {"name": ppath, "size": 0, "type": "directory"}
|
| 70 |
+
paths[ppath] = out
|
| 71 |
+
if detail:
|
| 72 |
+
out = sorted(paths.values(), key=operator.itemgetter("name"))
|
| 73 |
+
return out
|
| 74 |
+
else:
|
| 75 |
+
return sorted(paths)
|
venv/lib/python3.10/site-packages/fsspec/asyn.py
ADDED
|
@@ -0,0 +1,1103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import asyncio.events
|
| 3 |
+
import functools
|
| 4 |
+
import inspect
|
| 5 |
+
import io
|
| 6 |
+
import numbers
|
| 7 |
+
import os
|
| 8 |
+
import re
|
| 9 |
+
import threading
|
| 10 |
+
from collections.abc import Iterable
|
| 11 |
+
from glob import has_magic
|
| 12 |
+
from typing import TYPE_CHECKING
|
| 13 |
+
|
| 14 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 15 |
+
from .exceptions import FSTimeoutError
|
| 16 |
+
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
| 17 |
+
from .spec import AbstractBufferedFile, AbstractFileSystem
|
| 18 |
+
from .utils import glob_translate, is_exception, other_paths
|
| 19 |
+
|
| 20 |
+
private = re.compile("_[^_]")
|
| 21 |
+
iothread = [None] # dedicated fsspec IO thread
|
| 22 |
+
loop = [None] # global event loop for any non-async instance
|
| 23 |
+
_lock = None # global lock placeholder
|
| 24 |
+
get_running_loop = asyncio.get_running_loop
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_lock():
|
| 28 |
+
"""Allocate or return a threading lock.
|
| 29 |
+
|
| 30 |
+
The lock is allocated on first use to allow setting one lock per forked process.
|
| 31 |
+
"""
|
| 32 |
+
global _lock
|
| 33 |
+
if not _lock:
|
| 34 |
+
_lock = threading.Lock()
|
| 35 |
+
return _lock
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def reset_lock():
|
| 39 |
+
"""Reset the global lock.
|
| 40 |
+
|
| 41 |
+
This should be called only on the init of a forked process to reset the lock to
|
| 42 |
+
None, enabling the new forked process to get a new lock.
|
| 43 |
+
"""
|
| 44 |
+
global _lock
|
| 45 |
+
|
| 46 |
+
iothread[0] = None
|
| 47 |
+
loop[0] = None
|
| 48 |
+
_lock = None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def _runner(event, coro, result, timeout=None):
|
| 52 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
| 53 |
+
if timeout is not None:
|
| 54 |
+
coro = asyncio.wait_for(coro, timeout=timeout)
|
| 55 |
+
try:
|
| 56 |
+
result[0] = await coro
|
| 57 |
+
except Exception as ex:
|
| 58 |
+
result[0] = ex
|
| 59 |
+
finally:
|
| 60 |
+
event.set()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def sync(loop, func, *args, timeout=None, **kwargs):
|
| 64 |
+
"""
|
| 65 |
+
Make loop run coroutine until it returns. Runs in other thread
|
| 66 |
+
|
| 67 |
+
Examples
|
| 68 |
+
--------
|
| 69 |
+
>>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
|
| 70 |
+
timeout=timeout, **kwargs)
|
| 71 |
+
"""
|
| 72 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
| 73 |
+
# NB: if the loop is not running *yet*, it is OK to submit work
|
| 74 |
+
# and we will wait for it
|
| 75 |
+
if loop is None or loop.is_closed():
|
| 76 |
+
raise RuntimeError("Loop is not running")
|
| 77 |
+
try:
|
| 78 |
+
loop0 = asyncio.events.get_running_loop()
|
| 79 |
+
if loop0 is loop:
|
| 80 |
+
raise NotImplementedError("Calling sync() from within a running loop")
|
| 81 |
+
except NotImplementedError:
|
| 82 |
+
raise
|
| 83 |
+
except RuntimeError:
|
| 84 |
+
pass
|
| 85 |
+
coro = func(*args, **kwargs)
|
| 86 |
+
result = [None]
|
| 87 |
+
event = threading.Event()
|
| 88 |
+
asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
|
| 89 |
+
while True:
|
| 90 |
+
# this loops allows thread to get interrupted
|
| 91 |
+
if event.wait(1):
|
| 92 |
+
break
|
| 93 |
+
if timeout is not None:
|
| 94 |
+
timeout -= 1
|
| 95 |
+
if timeout < 0:
|
| 96 |
+
raise FSTimeoutError
|
| 97 |
+
|
| 98 |
+
return_result = result[0]
|
| 99 |
+
if isinstance(return_result, asyncio.TimeoutError):
|
| 100 |
+
# suppress asyncio.TimeoutError, raise FSTimeoutError
|
| 101 |
+
raise FSTimeoutError from return_result
|
| 102 |
+
elif isinstance(return_result, BaseException):
|
| 103 |
+
raise return_result
|
| 104 |
+
else:
|
| 105 |
+
return return_result
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def sync_wrapper(func, obj=None):
|
| 109 |
+
"""Given a function, make so can be called in blocking contexts
|
| 110 |
+
|
| 111 |
+
Leave obj=None if defining within a class. Pass the instance if attaching
|
| 112 |
+
as an attribute of the instance.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
@functools.wraps(func)
|
| 116 |
+
def wrapper(*args, **kwargs):
|
| 117 |
+
self = obj or args[0]
|
| 118 |
+
return sync(self.loop, func, *args, **kwargs)
|
| 119 |
+
|
| 120 |
+
return wrapper
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def get_loop():
|
| 124 |
+
"""Create or return the default fsspec IO loop
|
| 125 |
+
|
| 126 |
+
The loop will be running on a separate thread.
|
| 127 |
+
"""
|
| 128 |
+
if loop[0] is None:
|
| 129 |
+
with get_lock():
|
| 130 |
+
# repeat the check just in case the loop got filled between the
|
| 131 |
+
# previous two calls from another thread
|
| 132 |
+
if loop[0] is None:
|
| 133 |
+
loop[0] = asyncio.new_event_loop()
|
| 134 |
+
th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
|
| 135 |
+
th.daemon = True
|
| 136 |
+
th.start()
|
| 137 |
+
iothread[0] = th
|
| 138 |
+
return loop[0]
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def reset_after_fork():
|
| 142 |
+
global lock
|
| 143 |
+
loop[0] = None
|
| 144 |
+
iothread[0] = None
|
| 145 |
+
lock = None
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
if hasattr(os, "register_at_fork"):
|
| 149 |
+
# should be posix; this will do nothing for spawn or forkserver subprocesses
|
| 150 |
+
os.register_at_fork(after_in_child=reset_after_fork)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
if TYPE_CHECKING:
|
| 154 |
+
import resource
|
| 155 |
+
|
| 156 |
+
ResourceError = resource.error
|
| 157 |
+
else:
|
| 158 |
+
try:
|
| 159 |
+
import resource
|
| 160 |
+
except ImportError:
|
| 161 |
+
resource = None
|
| 162 |
+
ResourceError = OSError
|
| 163 |
+
else:
|
| 164 |
+
ResourceError = getattr(resource, "error", OSError)
|
| 165 |
+
|
| 166 |
+
_DEFAULT_BATCH_SIZE = 128
|
| 167 |
+
_NOFILES_DEFAULT_BATCH_SIZE = 1280
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _get_batch_size(nofiles=False):
|
| 171 |
+
from fsspec.config import conf
|
| 172 |
+
|
| 173 |
+
if nofiles:
|
| 174 |
+
if "nofiles_gather_batch_size" in conf:
|
| 175 |
+
return conf["nofiles_gather_batch_size"]
|
| 176 |
+
else:
|
| 177 |
+
if "gather_batch_size" in conf:
|
| 178 |
+
return conf["gather_batch_size"]
|
| 179 |
+
if nofiles:
|
| 180 |
+
return _NOFILES_DEFAULT_BATCH_SIZE
|
| 181 |
+
if resource is None:
|
| 182 |
+
return _DEFAULT_BATCH_SIZE
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
| 186 |
+
except (ImportError, ValueError, ResourceError):
|
| 187 |
+
return _DEFAULT_BATCH_SIZE
|
| 188 |
+
|
| 189 |
+
if soft_limit == resource.RLIM_INFINITY:
|
| 190 |
+
return -1
|
| 191 |
+
else:
|
| 192 |
+
return soft_limit // 8
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def running_async() -> bool:
|
| 196 |
+
"""Being executed by an event loop?"""
|
| 197 |
+
try:
|
| 198 |
+
asyncio.get_running_loop()
|
| 199 |
+
return True
|
| 200 |
+
except RuntimeError:
|
| 201 |
+
return False
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
async def _run_coros_in_chunks(
|
| 205 |
+
coros,
|
| 206 |
+
batch_size=None,
|
| 207 |
+
callback=DEFAULT_CALLBACK,
|
| 208 |
+
timeout=None,
|
| 209 |
+
return_exceptions=False,
|
| 210 |
+
nofiles=False,
|
| 211 |
+
):
|
| 212 |
+
"""Run the given coroutines in chunks.
|
| 213 |
+
|
| 214 |
+
Parameters
|
| 215 |
+
----------
|
| 216 |
+
coros: list of coroutines to run
|
| 217 |
+
batch_size: int or None
|
| 218 |
+
Number of coroutines to submit/wait on simultaneously.
|
| 219 |
+
If -1, then it will not be any throttling. If
|
| 220 |
+
None, it will be inferred from _get_batch_size()
|
| 221 |
+
callback: fsspec.callbacks.Callback instance
|
| 222 |
+
Gets a relative_update when each coroutine completes
|
| 223 |
+
timeout: number or None
|
| 224 |
+
If given, each coroutine times out after this time. Note that, since
|
| 225 |
+
there are multiple batches, the total run time of this function will in
|
| 226 |
+
general be longer
|
| 227 |
+
return_exceptions: bool
|
| 228 |
+
Same meaning as in asyncio.gather
|
| 229 |
+
nofiles: bool
|
| 230 |
+
If inferring the batch_size, does this operation involve local files?
|
| 231 |
+
If yes, you normally expect smaller batches.
|
| 232 |
+
"""
|
| 233 |
+
|
| 234 |
+
if batch_size is None:
|
| 235 |
+
batch_size = _get_batch_size(nofiles=nofiles)
|
| 236 |
+
|
| 237 |
+
if batch_size == -1:
|
| 238 |
+
batch_size = len(coros)
|
| 239 |
+
|
| 240 |
+
assert batch_size > 0
|
| 241 |
+
|
| 242 |
+
async def _run_coro(coro, i):
|
| 243 |
+
try:
|
| 244 |
+
return await asyncio.wait_for(coro, timeout=timeout), i
|
| 245 |
+
except Exception as e:
|
| 246 |
+
if not return_exceptions:
|
| 247 |
+
raise
|
| 248 |
+
return e, i
|
| 249 |
+
finally:
|
| 250 |
+
callback.relative_update(1)
|
| 251 |
+
|
| 252 |
+
i = 0
|
| 253 |
+
n = len(coros)
|
| 254 |
+
results = [None] * n
|
| 255 |
+
pending = set()
|
| 256 |
+
|
| 257 |
+
while pending or i < n:
|
| 258 |
+
while len(pending) < batch_size and i < n:
|
| 259 |
+
pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
|
| 260 |
+
i += 1
|
| 261 |
+
|
| 262 |
+
if not pending:
|
| 263 |
+
break
|
| 264 |
+
|
| 265 |
+
done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
| 266 |
+
while done:
|
| 267 |
+
result, k = await done.pop()
|
| 268 |
+
results[k] = result
|
| 269 |
+
|
| 270 |
+
return results
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# these methods should be implemented as async by any async-able backend
|
| 274 |
+
async_methods = [
|
| 275 |
+
"_ls",
|
| 276 |
+
"_cat_file",
|
| 277 |
+
"_get_file",
|
| 278 |
+
"_put_file",
|
| 279 |
+
"_rm_file",
|
| 280 |
+
"_cp_file",
|
| 281 |
+
"_pipe_file",
|
| 282 |
+
"_expand_path",
|
| 283 |
+
"_info",
|
| 284 |
+
"_isfile",
|
| 285 |
+
"_isdir",
|
| 286 |
+
"_exists",
|
| 287 |
+
"_walk",
|
| 288 |
+
"_glob",
|
| 289 |
+
"_find",
|
| 290 |
+
"_du",
|
| 291 |
+
"_size",
|
| 292 |
+
"_mkdir",
|
| 293 |
+
"_makedirs",
|
| 294 |
+
]
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
class AsyncFileSystem(AbstractFileSystem):
|
| 298 |
+
"""Async file operations, default implementations
|
| 299 |
+
|
| 300 |
+
Passes bulk operations to asyncio.gather for concurrent operation.
|
| 301 |
+
|
| 302 |
+
Implementations that have concurrent batch operations and/or async methods
|
| 303 |
+
should inherit from this class instead of AbstractFileSystem. Docstrings are
|
| 304 |
+
copied from the un-underscored method in AbstractFileSystem, if not given.
|
| 305 |
+
"""
|
| 306 |
+
|
| 307 |
+
# note that methods do not have docstring here; they will be copied
|
| 308 |
+
# for _* methods and inferred for overridden methods.
|
| 309 |
+
|
| 310 |
+
async_impl = True
|
| 311 |
+
mirror_sync_methods = True
|
| 312 |
+
disable_throttling = False
|
| 313 |
+
|
| 314 |
+
def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
|
| 315 |
+
self.asynchronous = asynchronous
|
| 316 |
+
self._pid = os.getpid()
|
| 317 |
+
if not asynchronous:
|
| 318 |
+
self._loop = loop or get_loop()
|
| 319 |
+
else:
|
| 320 |
+
self._loop = None
|
| 321 |
+
self.batch_size = batch_size
|
| 322 |
+
super().__init__(*args, **kwargs)
|
| 323 |
+
|
| 324 |
+
@property
|
| 325 |
+
def loop(self):
|
| 326 |
+
if self._pid != os.getpid():
|
| 327 |
+
raise RuntimeError("This class is not fork-safe")
|
| 328 |
+
return self._loop
|
| 329 |
+
|
| 330 |
+
async def _rm_file(self, path, **kwargs):
|
| 331 |
+
if (
|
| 332 |
+
inspect.iscoroutinefunction(self._rm)
|
| 333 |
+
and type(self)._rm is not AsyncFileSystem._rm
|
| 334 |
+
):
|
| 335 |
+
return await self._rm(path, recursive=False, batch_size=1, **kwargs)
|
| 336 |
+
raise NotImplementedError
|
| 337 |
+
|
| 338 |
+
async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
|
| 339 |
+
# TODO: implement on_error
|
| 340 |
+
batch_size = batch_size or self.batch_size
|
| 341 |
+
path = await self._expand_path(path, recursive=recursive)
|
| 342 |
+
return await _run_coros_in_chunks(
|
| 343 |
+
[self._rm_file(p, **kwargs) for p in reversed(path)],
|
| 344 |
+
batch_size=batch_size,
|
| 345 |
+
nofiles=True,
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
async def _cp_file(self, path1, path2, **kwargs):
|
| 349 |
+
raise NotImplementedError
|
| 350 |
+
|
| 351 |
+
async def _mv_file(self, path1, path2):
|
| 352 |
+
await self._cp_file(path1, path2)
|
| 353 |
+
await self._rm_file(path1)
|
| 354 |
+
|
| 355 |
+
async def _copy(
|
| 356 |
+
self,
|
| 357 |
+
path1,
|
| 358 |
+
path2,
|
| 359 |
+
recursive=False,
|
| 360 |
+
on_error=None,
|
| 361 |
+
maxdepth=None,
|
| 362 |
+
batch_size=None,
|
| 363 |
+
**kwargs,
|
| 364 |
+
):
|
| 365 |
+
if on_error is None and recursive:
|
| 366 |
+
on_error = "ignore"
|
| 367 |
+
elif on_error is None:
|
| 368 |
+
on_error = "raise"
|
| 369 |
+
|
| 370 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
| 371 |
+
# No need to expand paths when both source and destination
|
| 372 |
+
# are provided as lists
|
| 373 |
+
paths1 = path1
|
| 374 |
+
paths2 = path2
|
| 375 |
+
else:
|
| 376 |
+
source_is_str = isinstance(path1, str)
|
| 377 |
+
paths1 = await self._expand_path(
|
| 378 |
+
path1, maxdepth=maxdepth, recursive=recursive
|
| 379 |
+
)
|
| 380 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 381 |
+
# Non-recursive glob does not copy directories
|
| 382 |
+
paths1 = [
|
| 383 |
+
p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
|
| 384 |
+
]
|
| 385 |
+
if not paths1:
|
| 386 |
+
return
|
| 387 |
+
|
| 388 |
+
source_is_file = len(paths1) == 1
|
| 389 |
+
dest_is_dir = isinstance(path2, str) and (
|
| 390 |
+
trailing_sep(path2) or await self._isdir(path2)
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
exists = source_is_str and (
|
| 394 |
+
(has_magic(path1) and source_is_file)
|
| 395 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
| 396 |
+
)
|
| 397 |
+
paths2 = other_paths(
|
| 398 |
+
paths1,
|
| 399 |
+
path2,
|
| 400 |
+
exists=exists,
|
| 401 |
+
flatten=not source_is_str,
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
batch_size = batch_size or self.batch_size
|
| 405 |
+
coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
|
| 406 |
+
result = await _run_coros_in_chunks(
|
| 407 |
+
coros, batch_size=batch_size, return_exceptions=True, nofiles=True
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
for ex in filter(is_exception, result):
|
| 411 |
+
if on_error == "ignore" and isinstance(ex, FileNotFoundError):
|
| 412 |
+
continue
|
| 413 |
+
raise ex
|
| 414 |
+
|
| 415 |
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
| 416 |
+
raise NotImplementedError
|
| 417 |
+
|
| 418 |
+
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
| 419 |
+
if isinstance(path, str):
|
| 420 |
+
path = {path: value}
|
| 421 |
+
batch_size = batch_size or self.batch_size
|
| 422 |
+
return await _run_coros_in_chunks(
|
| 423 |
+
[self._pipe_file(k, v, **kwargs) for k, v in path.items()],
|
| 424 |
+
batch_size=batch_size,
|
| 425 |
+
nofiles=True,
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
async def _process_limits(self, url, start, end):
|
| 429 |
+
"""Helper for "Range"-based _cat_file"""
|
| 430 |
+
size = None
|
| 431 |
+
suff = False
|
| 432 |
+
if start is not None and start < 0:
|
| 433 |
+
# if start is negative and end None, end is the "suffix length"
|
| 434 |
+
if end is None:
|
| 435 |
+
end = -start
|
| 436 |
+
start = ""
|
| 437 |
+
suff = True
|
| 438 |
+
else:
|
| 439 |
+
size = size or (await self._info(url))["size"]
|
| 440 |
+
start = size + start
|
| 441 |
+
elif start is None:
|
| 442 |
+
start = 0
|
| 443 |
+
if not suff:
|
| 444 |
+
if end is not None and end < 0:
|
| 445 |
+
if start is not None:
|
| 446 |
+
size = size or (await self._info(url))["size"]
|
| 447 |
+
end = size + end
|
| 448 |
+
elif end is None:
|
| 449 |
+
end = ""
|
| 450 |
+
if isinstance(end, numbers.Integral):
|
| 451 |
+
end -= 1 # bytes range is inclusive
|
| 452 |
+
return f"bytes={start}-{end}"
|
| 453 |
+
|
| 454 |
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
| 455 |
+
raise NotImplementedError
|
| 456 |
+
|
| 457 |
+
async def _cat(
|
| 458 |
+
self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
|
| 459 |
+
):
|
| 460 |
+
paths = await self._expand_path(path, recursive=recursive)
|
| 461 |
+
coros = [self._cat_file(path, **kwargs) for path in paths]
|
| 462 |
+
batch_size = batch_size or self.batch_size
|
| 463 |
+
out = await _run_coros_in_chunks(
|
| 464 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
| 465 |
+
)
|
| 466 |
+
if on_error == "raise":
|
| 467 |
+
ex = next(filter(is_exception, out), False)
|
| 468 |
+
if ex:
|
| 469 |
+
raise ex
|
| 470 |
+
if (
|
| 471 |
+
len(paths) > 1
|
| 472 |
+
or isinstance(path, list)
|
| 473 |
+
or paths[0] != self._strip_protocol(path)
|
| 474 |
+
):
|
| 475 |
+
return {
|
| 476 |
+
k: v
|
| 477 |
+
for k, v in zip(paths, out)
|
| 478 |
+
if on_error != "omit" or not is_exception(v)
|
| 479 |
+
}
|
| 480 |
+
else:
|
| 481 |
+
return out[0]
|
| 482 |
+
|
| 483 |
+
async def _cat_ranges(
|
| 484 |
+
self,
|
| 485 |
+
paths,
|
| 486 |
+
starts,
|
| 487 |
+
ends,
|
| 488 |
+
max_gap=None,
|
| 489 |
+
batch_size=None,
|
| 490 |
+
on_error="return",
|
| 491 |
+
**kwargs,
|
| 492 |
+
):
|
| 493 |
+
"""Get the contents of byte ranges from one or more files
|
| 494 |
+
|
| 495 |
+
Parameters
|
| 496 |
+
----------
|
| 497 |
+
paths: list
|
| 498 |
+
A list of of filepaths on this filesystems
|
| 499 |
+
starts, ends: int or list
|
| 500 |
+
Bytes limits of the read. If using a single int, the same value will be
|
| 501 |
+
used to read all the specified files.
|
| 502 |
+
"""
|
| 503 |
+
# TODO: on_error
|
| 504 |
+
if max_gap is not None:
|
| 505 |
+
# use utils.merge_offset_ranges
|
| 506 |
+
raise NotImplementedError
|
| 507 |
+
if not isinstance(paths, list):
|
| 508 |
+
raise TypeError
|
| 509 |
+
if not isinstance(starts, Iterable):
|
| 510 |
+
starts = [starts] * len(paths)
|
| 511 |
+
if not isinstance(ends, Iterable):
|
| 512 |
+
ends = [ends] * len(paths)
|
| 513 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 514 |
+
raise ValueError
|
| 515 |
+
coros = [
|
| 516 |
+
self._cat_file(p, start=s, end=e, **kwargs)
|
| 517 |
+
for p, s, e in zip(paths, starts, ends)
|
| 518 |
+
]
|
| 519 |
+
batch_size = batch_size or self.batch_size
|
| 520 |
+
return await _run_coros_in_chunks(
|
| 521 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
| 525 |
+
raise NotImplementedError
|
| 526 |
+
|
| 527 |
+
async def _put(
|
| 528 |
+
self,
|
| 529 |
+
lpath,
|
| 530 |
+
rpath,
|
| 531 |
+
recursive=False,
|
| 532 |
+
callback=DEFAULT_CALLBACK,
|
| 533 |
+
batch_size=None,
|
| 534 |
+
maxdepth=None,
|
| 535 |
+
**kwargs,
|
| 536 |
+
):
|
| 537 |
+
"""Copy file(s) from local.
|
| 538 |
+
|
| 539 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
| 540 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 541 |
+
will go within.
|
| 542 |
+
|
| 543 |
+
The put_file method will be called concurrently on a batch of files. The
|
| 544 |
+
batch_size option can configure the amount of futures that can be executed
|
| 545 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
| 546 |
+
The default can be set for this instance by passing "batch_size" in the
|
| 547 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
| 548 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
| 549 |
+
"""
|
| 550 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 551 |
+
# No need to expand paths when both source and destination
|
| 552 |
+
# are provided as lists
|
| 553 |
+
rpaths = rpath
|
| 554 |
+
lpaths = lpath
|
| 555 |
+
else:
|
| 556 |
+
source_is_str = isinstance(lpath, str)
|
| 557 |
+
if source_is_str:
|
| 558 |
+
lpath = make_path_posix(lpath)
|
| 559 |
+
fs = LocalFileSystem()
|
| 560 |
+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
|
| 561 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 562 |
+
# Non-recursive glob does not copy directories
|
| 563 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
| 564 |
+
if not lpaths:
|
| 565 |
+
return
|
| 566 |
+
|
| 567 |
+
source_is_file = len(lpaths) == 1
|
| 568 |
+
dest_is_dir = isinstance(rpath, str) and (
|
| 569 |
+
trailing_sep(rpath) or await self._isdir(rpath)
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
rpath = self._strip_protocol(rpath)
|
| 573 |
+
exists = source_is_str and (
|
| 574 |
+
(has_magic(lpath) and source_is_file)
|
| 575 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
| 576 |
+
)
|
| 577 |
+
rpaths = other_paths(
|
| 578 |
+
lpaths,
|
| 579 |
+
rpath,
|
| 580 |
+
exists=exists,
|
| 581 |
+
flatten=not source_is_str,
|
| 582 |
+
)
|
| 583 |
+
|
| 584 |
+
is_dir = {l: os.path.isdir(l) for l in lpaths}
|
| 585 |
+
rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
|
| 586 |
+
file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
|
| 587 |
+
|
| 588 |
+
await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
|
| 589 |
+
batch_size = batch_size or self.batch_size
|
| 590 |
+
|
| 591 |
+
coros = []
|
| 592 |
+
callback.set_size(len(file_pairs))
|
| 593 |
+
for lfile, rfile in file_pairs:
|
| 594 |
+
put_file = callback.branch_coro(self._put_file)
|
| 595 |
+
coros.append(put_file(lfile, rfile, **kwargs))
|
| 596 |
+
|
| 597 |
+
return await _run_coros_in_chunks(
|
| 598 |
+
coros, batch_size=batch_size, callback=callback
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
| 602 |
+
raise NotImplementedError
|
| 603 |
+
|
| 604 |
+
async def _get(
|
| 605 |
+
self,
|
| 606 |
+
rpath,
|
| 607 |
+
lpath,
|
| 608 |
+
recursive=False,
|
| 609 |
+
callback=DEFAULT_CALLBACK,
|
| 610 |
+
maxdepth=None,
|
| 611 |
+
**kwargs,
|
| 612 |
+
):
|
| 613 |
+
"""Copy file(s) to local.
|
| 614 |
+
|
| 615 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
| 616 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 617 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
| 618 |
+
and will be expanded.
|
| 619 |
+
|
| 620 |
+
The get_file method will be called concurrently on a batch of files. The
|
| 621 |
+
batch_size option can configure the amount of futures that can be executed
|
| 622 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
| 623 |
+
The default can be set for this instance by passing "batch_size" in the
|
| 624 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
| 625 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
| 626 |
+
"""
|
| 627 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 628 |
+
# No need to expand paths when both source and destination
|
| 629 |
+
# are provided as lists
|
| 630 |
+
rpaths = rpath
|
| 631 |
+
lpaths = lpath
|
| 632 |
+
else:
|
| 633 |
+
source_is_str = isinstance(rpath, str)
|
| 634 |
+
# First check for rpath trailing slash as _strip_protocol removes it.
|
| 635 |
+
source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
|
| 636 |
+
rpath = self._strip_protocol(rpath)
|
| 637 |
+
rpaths = await self._expand_path(
|
| 638 |
+
rpath, recursive=recursive, maxdepth=maxdepth
|
| 639 |
+
)
|
| 640 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 641 |
+
# Non-recursive glob does not copy directories
|
| 642 |
+
rpaths = [
|
| 643 |
+
p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
|
| 644 |
+
]
|
| 645 |
+
if not rpaths:
|
| 646 |
+
return
|
| 647 |
+
|
| 648 |
+
lpath = make_path_posix(lpath)
|
| 649 |
+
source_is_file = len(rpaths) == 1
|
| 650 |
+
dest_is_dir = isinstance(lpath, str) and (
|
| 651 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
| 652 |
+
)
|
| 653 |
+
|
| 654 |
+
exists = source_is_str and (
|
| 655 |
+
(has_magic(rpath) and source_is_file)
|
| 656 |
+
or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
|
| 657 |
+
)
|
| 658 |
+
lpaths = other_paths(
|
| 659 |
+
rpaths,
|
| 660 |
+
lpath,
|
| 661 |
+
exists=exists,
|
| 662 |
+
flatten=not source_is_str,
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
[os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
|
| 666 |
+
batch_size = kwargs.pop("batch_size", self.batch_size)
|
| 667 |
+
|
| 668 |
+
coros = []
|
| 669 |
+
callback.set_size(len(lpaths))
|
| 670 |
+
for lpath, rpath in zip(lpaths, rpaths):
|
| 671 |
+
get_file = callback.branch_coro(self._get_file)
|
| 672 |
+
coros.append(get_file(rpath, lpath, **kwargs))
|
| 673 |
+
return await _run_coros_in_chunks(
|
| 674 |
+
coros, batch_size=batch_size, callback=callback
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
async def _isfile(self, path):
|
| 678 |
+
try:
|
| 679 |
+
return (await self._info(path))["type"] == "file"
|
| 680 |
+
except: # noqa: E722
|
| 681 |
+
return False
|
| 682 |
+
|
| 683 |
+
async def _isdir(self, path):
|
| 684 |
+
try:
|
| 685 |
+
return (await self._info(path))["type"] == "directory"
|
| 686 |
+
except OSError:
|
| 687 |
+
return False
|
| 688 |
+
|
| 689 |
+
async def _size(self, path):
|
| 690 |
+
return (await self._info(path)).get("size", None)
|
| 691 |
+
|
| 692 |
+
async def _sizes(self, paths, batch_size=None):
|
| 693 |
+
batch_size = batch_size or self.batch_size
|
| 694 |
+
return await _run_coros_in_chunks(
|
| 695 |
+
[self._size(p) for p in paths], batch_size=batch_size
|
| 696 |
+
)
|
| 697 |
+
|
| 698 |
+
async def _exists(self, path, **kwargs):
|
| 699 |
+
try:
|
| 700 |
+
await self._info(path, **kwargs)
|
| 701 |
+
return True
|
| 702 |
+
except FileNotFoundError:
|
| 703 |
+
return False
|
| 704 |
+
|
| 705 |
+
async def _info(self, path, **kwargs):
|
| 706 |
+
raise NotImplementedError
|
| 707 |
+
|
| 708 |
+
async def _ls(self, path, detail=True, **kwargs):
|
| 709 |
+
raise NotImplementedError
|
| 710 |
+
|
| 711 |
+
async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
|
| 712 |
+
if maxdepth is not None and maxdepth < 1:
|
| 713 |
+
raise ValueError("maxdepth must be at least 1")
|
| 714 |
+
|
| 715 |
+
path = self._strip_protocol(path)
|
| 716 |
+
full_dirs = {}
|
| 717 |
+
dirs = {}
|
| 718 |
+
files = {}
|
| 719 |
+
|
| 720 |
+
detail = kwargs.pop("detail", False)
|
| 721 |
+
try:
|
| 722 |
+
listing = await self._ls(path, detail=True, **kwargs)
|
| 723 |
+
except (FileNotFoundError, OSError) as e:
|
| 724 |
+
if on_error == "raise":
|
| 725 |
+
raise
|
| 726 |
+
elif callable(on_error):
|
| 727 |
+
on_error(e)
|
| 728 |
+
if detail:
|
| 729 |
+
yield path, {}, {}
|
| 730 |
+
else:
|
| 731 |
+
yield path, [], []
|
| 732 |
+
return
|
| 733 |
+
|
| 734 |
+
for info in listing:
|
| 735 |
+
# each info name must be at least [path]/part , but here
|
| 736 |
+
# we check also for names like [path]/part/
|
| 737 |
+
pathname = info["name"].rstrip("/")
|
| 738 |
+
name = pathname.rsplit("/", 1)[-1]
|
| 739 |
+
if info["type"] == "directory" and pathname != path:
|
| 740 |
+
# do not include "self" path
|
| 741 |
+
full_dirs[name] = pathname
|
| 742 |
+
dirs[name] = info
|
| 743 |
+
elif pathname == path:
|
| 744 |
+
# file-like with same name as give path
|
| 745 |
+
files[""] = info
|
| 746 |
+
else:
|
| 747 |
+
files[name] = info
|
| 748 |
+
|
| 749 |
+
if detail:
|
| 750 |
+
yield path, dirs, files
|
| 751 |
+
else:
|
| 752 |
+
yield path, list(dirs), list(files)
|
| 753 |
+
|
| 754 |
+
if maxdepth is not None:
|
| 755 |
+
maxdepth -= 1
|
| 756 |
+
if maxdepth < 1:
|
| 757 |
+
return
|
| 758 |
+
|
| 759 |
+
for d in dirs:
|
| 760 |
+
async for _ in self._walk(
|
| 761 |
+
full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
|
| 762 |
+
):
|
| 763 |
+
yield _
|
| 764 |
+
|
| 765 |
+
async def _glob(self, path, maxdepth=None, **kwargs):
|
| 766 |
+
if maxdepth is not None and maxdepth < 1:
|
| 767 |
+
raise ValueError("maxdepth must be at least 1")
|
| 768 |
+
|
| 769 |
+
import re
|
| 770 |
+
|
| 771 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
| 772 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
| 773 |
+
path = self._strip_protocol(path)
|
| 774 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
| 775 |
+
tuple(sep + "**" for sep in seps)
|
| 776 |
+
)
|
| 777 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 778 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
| 779 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 780 |
+
|
| 781 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
| 782 |
+
|
| 783 |
+
detail = kwargs.pop("detail", False)
|
| 784 |
+
withdirs = kwargs.pop("withdirs", True)
|
| 785 |
+
|
| 786 |
+
if not has_magic(path):
|
| 787 |
+
if await self._exists(path, **kwargs):
|
| 788 |
+
if not detail:
|
| 789 |
+
return [path]
|
| 790 |
+
else:
|
| 791 |
+
return {path: await self._info(path, **kwargs)}
|
| 792 |
+
else:
|
| 793 |
+
if not detail:
|
| 794 |
+
return [] # glob of non-existent returns empty
|
| 795 |
+
else:
|
| 796 |
+
return {}
|
| 797 |
+
elif "/" in path[:min_idx]:
|
| 798 |
+
min_idx = path[:min_idx].rindex("/")
|
| 799 |
+
root = path[: min_idx + 1]
|
| 800 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 801 |
+
else:
|
| 802 |
+
root = ""
|
| 803 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 804 |
+
|
| 805 |
+
if "**" in path:
|
| 806 |
+
if maxdepth is not None:
|
| 807 |
+
idx_double_stars = path.find("**")
|
| 808 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 809 |
+
depth = depth - depth_double_stars + maxdepth
|
| 810 |
+
else:
|
| 811 |
+
depth = None
|
| 812 |
+
|
| 813 |
+
allpaths = await self._find(
|
| 814 |
+
root, maxdepth=depth, withdirs=withdirs, detail=True, **kwargs
|
| 815 |
+
)
|
| 816 |
+
|
| 817 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
| 818 |
+
pattern = re.compile(pattern)
|
| 819 |
+
|
| 820 |
+
out = {
|
| 821 |
+
p: info
|
| 822 |
+
for p, info in sorted(allpaths.items())
|
| 823 |
+
if pattern.match(
|
| 824 |
+
p + "/"
|
| 825 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 826 |
+
else p
|
| 827 |
+
)
|
| 828 |
+
}
|
| 829 |
+
|
| 830 |
+
if detail:
|
| 831 |
+
return out
|
| 832 |
+
else:
|
| 833 |
+
return list(out)
|
| 834 |
+
|
| 835 |
+
async def _du(self, path, total=True, maxdepth=None, **kwargs):
|
| 836 |
+
sizes = {}
|
| 837 |
+
# async for?
|
| 838 |
+
for f in await self._find(path, maxdepth=maxdepth, **kwargs):
|
| 839 |
+
info = await self._info(f)
|
| 840 |
+
sizes[info["name"]] = info["size"]
|
| 841 |
+
if total:
|
| 842 |
+
return sum(sizes.values())
|
| 843 |
+
else:
|
| 844 |
+
return sizes
|
| 845 |
+
|
| 846 |
+
async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
|
| 847 |
+
path = self._strip_protocol(path)
|
| 848 |
+
out = {}
|
| 849 |
+
detail = kwargs.pop("detail", False)
|
| 850 |
+
|
| 851 |
+
# Add the root directory if withdirs is requested
|
| 852 |
+
# This is needed for posix glob compliance
|
| 853 |
+
if withdirs and path != "" and await self._isdir(path):
|
| 854 |
+
out[path] = await self._info(path)
|
| 855 |
+
|
| 856 |
+
# async for?
|
| 857 |
+
async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
|
| 858 |
+
if withdirs:
|
| 859 |
+
files.update(dirs)
|
| 860 |
+
out.update({info["name"]: info for name, info in files.items()})
|
| 861 |
+
if not out and (await self._isfile(path)):
|
| 862 |
+
# walk works on directories, but find should also return [path]
|
| 863 |
+
# when path happens to be a file
|
| 864 |
+
out[path] = {}
|
| 865 |
+
names = sorted(out)
|
| 866 |
+
if not detail:
|
| 867 |
+
return names
|
| 868 |
+
else:
|
| 869 |
+
return {name: out[name] for name in names}
|
| 870 |
+
|
| 871 |
+
async def _expand_path(self, path, recursive=False, maxdepth=None):
|
| 872 |
+
if maxdepth is not None and maxdepth < 1:
|
| 873 |
+
raise ValueError("maxdepth must be at least 1")
|
| 874 |
+
|
| 875 |
+
if isinstance(path, str):
|
| 876 |
+
out = await self._expand_path([path], recursive, maxdepth)
|
| 877 |
+
else:
|
| 878 |
+
out = set()
|
| 879 |
+
path = [self._strip_protocol(p) for p in path]
|
| 880 |
+
for p in path: # can gather here
|
| 881 |
+
if has_magic(p):
|
| 882 |
+
bit = set(await self._glob(p, maxdepth=maxdepth))
|
| 883 |
+
out |= bit
|
| 884 |
+
if recursive:
|
| 885 |
+
# glob call above expanded one depth so if maxdepth is defined
|
| 886 |
+
# then decrement it in expand_path call below. If it is zero
|
| 887 |
+
# after decrementing then avoid expand_path call.
|
| 888 |
+
if maxdepth is not None and maxdepth <= 1:
|
| 889 |
+
continue
|
| 890 |
+
out |= set(
|
| 891 |
+
await self._expand_path(
|
| 892 |
+
list(bit),
|
| 893 |
+
recursive=recursive,
|
| 894 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
| 895 |
+
)
|
| 896 |
+
)
|
| 897 |
+
continue
|
| 898 |
+
elif recursive:
|
| 899 |
+
rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
|
| 900 |
+
out |= rec
|
| 901 |
+
if p not in out and (recursive is False or (await self._exists(p))):
|
| 902 |
+
# should only check once, for the root
|
| 903 |
+
out.add(p)
|
| 904 |
+
if not out:
|
| 905 |
+
raise FileNotFoundError(path)
|
| 906 |
+
return sorted(out)
|
| 907 |
+
|
| 908 |
+
async def _mkdir(self, path, create_parents=True, **kwargs):
|
| 909 |
+
pass # not necessary to implement, may not have directories
|
| 910 |
+
|
| 911 |
+
async def _makedirs(self, path, exist_ok=False):
|
| 912 |
+
pass # not necessary to implement, may not have directories
|
| 913 |
+
|
| 914 |
+
async def open_async(self, path, mode="rb", **kwargs):
|
| 915 |
+
if "b" not in mode or kwargs.get("compression"):
|
| 916 |
+
raise ValueError
|
| 917 |
+
raise NotImplementedError
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
def mirror_sync_methods(obj):
|
| 921 |
+
"""Populate sync and async methods for obj
|
| 922 |
+
|
| 923 |
+
For each method will create a sync version if the name refers to an async method
|
| 924 |
+
(coroutine) and there is no override in the child class; will create an async
|
| 925 |
+
method for the corresponding sync method if there is no implementation.
|
| 926 |
+
|
| 927 |
+
Uses the methods specified in
|
| 928 |
+
- async_methods: the set that an implementation is expected to provide
|
| 929 |
+
- default_async_methods: that can be derived from their sync version in
|
| 930 |
+
AbstractFileSystem
|
| 931 |
+
- AsyncFileSystem: async-specific default coroutines
|
| 932 |
+
"""
|
| 933 |
+
from fsspec import AbstractFileSystem
|
| 934 |
+
|
| 935 |
+
for method in async_methods + dir(AsyncFileSystem):
|
| 936 |
+
if not method.startswith("_"):
|
| 937 |
+
continue
|
| 938 |
+
smethod = method[1:]
|
| 939 |
+
if private.match(method):
|
| 940 |
+
isco = inspect.iscoroutinefunction(getattr(obj, method, None))
|
| 941 |
+
unsync = getattr(getattr(obj, smethod, False), "__func__", None)
|
| 942 |
+
is_default = unsync is getattr(AbstractFileSystem, smethod, "")
|
| 943 |
+
if isco and is_default:
|
| 944 |
+
mth = sync_wrapper(getattr(obj, method), obj=obj)
|
| 945 |
+
setattr(obj, smethod, mth)
|
| 946 |
+
if not mth.__doc__:
|
| 947 |
+
mth.__doc__ = getattr(
|
| 948 |
+
getattr(AbstractFileSystem, smethod, None), "__doc__", ""
|
| 949 |
+
)
|
| 950 |
+
|
| 951 |
+
|
| 952 |
+
class FSSpecCoroutineCancel(Exception):
|
| 953 |
+
pass
|
| 954 |
+
|
| 955 |
+
|
| 956 |
+
def _dump_running_tasks(
|
| 957 |
+
printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
|
| 958 |
+
):
|
| 959 |
+
import traceback
|
| 960 |
+
|
| 961 |
+
tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
|
| 962 |
+
if printout:
|
| 963 |
+
[task.print_stack() for task in tasks]
|
| 964 |
+
out = [
|
| 965 |
+
{
|
| 966 |
+
"locals": task._coro.cr_frame.f_locals,
|
| 967 |
+
"file": task._coro.cr_frame.f_code.co_filename,
|
| 968 |
+
"firstline": task._coro.cr_frame.f_code.co_firstlineno,
|
| 969 |
+
"linelo": task._coro.cr_frame.f_lineno,
|
| 970 |
+
"stack": traceback.format_stack(task._coro.cr_frame),
|
| 971 |
+
"task": task if with_task else None,
|
| 972 |
+
}
|
| 973 |
+
for task in tasks
|
| 974 |
+
]
|
| 975 |
+
if cancel:
|
| 976 |
+
for t in tasks:
|
| 977 |
+
cbs = t._callbacks
|
| 978 |
+
t.cancel()
|
| 979 |
+
asyncio.futures.Future.set_exception(t, exc)
|
| 980 |
+
asyncio.futures.Future.cancel(t)
|
| 981 |
+
[cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
|
| 982 |
+
try:
|
| 983 |
+
t._coro.throw(exc) # exits coro, unless explicitly handled
|
| 984 |
+
except exc:
|
| 985 |
+
pass
|
| 986 |
+
return out
|
| 987 |
+
|
| 988 |
+
|
| 989 |
+
class AbstractAsyncStreamedFile(AbstractBufferedFile):
|
| 990 |
+
# no read buffering, and always auto-commit
|
| 991 |
+
# TODO: readahead might still be useful here, but needs async version
|
| 992 |
+
|
| 993 |
+
async def read(self, length=-1):
|
| 994 |
+
"""
|
| 995 |
+
Return data from cache, or fetch pieces as necessary
|
| 996 |
+
|
| 997 |
+
Parameters
|
| 998 |
+
----------
|
| 999 |
+
length: int (-1)
|
| 1000 |
+
Number of bytes to read; if <0, all remaining bytes.
|
| 1001 |
+
"""
|
| 1002 |
+
length = -1 if length is None else int(length)
|
| 1003 |
+
if self.mode != "rb":
|
| 1004 |
+
raise ValueError("File not in read mode")
|
| 1005 |
+
if length < 0:
|
| 1006 |
+
length = self.size - self.loc
|
| 1007 |
+
if self.closed:
|
| 1008 |
+
raise ValueError("I/O operation on closed file.")
|
| 1009 |
+
if length == 0:
|
| 1010 |
+
# don't even bother calling fetch
|
| 1011 |
+
return b""
|
| 1012 |
+
out = await self._fetch_range(self.loc, self.loc + length)
|
| 1013 |
+
self.loc += len(out)
|
| 1014 |
+
return out
|
| 1015 |
+
|
| 1016 |
+
async def write(self, data):
|
| 1017 |
+
"""
|
| 1018 |
+
Write data to buffer.
|
| 1019 |
+
|
| 1020 |
+
Buffer only sent on flush() or if buffer is greater than
|
| 1021 |
+
or equal to blocksize.
|
| 1022 |
+
|
| 1023 |
+
Parameters
|
| 1024 |
+
----------
|
| 1025 |
+
data: bytes
|
| 1026 |
+
Set of bytes to be written.
|
| 1027 |
+
"""
|
| 1028 |
+
if self.mode not in {"wb", "ab"}:
|
| 1029 |
+
raise ValueError("File not in write mode")
|
| 1030 |
+
if self.closed:
|
| 1031 |
+
raise ValueError("I/O operation on closed file.")
|
| 1032 |
+
if self.forced:
|
| 1033 |
+
raise ValueError("This file has been force-flushed, can only close")
|
| 1034 |
+
out = self.buffer.write(data)
|
| 1035 |
+
self.loc += out
|
| 1036 |
+
if self.buffer.tell() >= self.blocksize:
|
| 1037 |
+
await self.flush()
|
| 1038 |
+
return out
|
| 1039 |
+
|
| 1040 |
+
async def close(self):
|
| 1041 |
+
"""Close file
|
| 1042 |
+
|
| 1043 |
+
Finalizes writes, discards cache
|
| 1044 |
+
"""
|
| 1045 |
+
if getattr(self, "_unclosable", False):
|
| 1046 |
+
return
|
| 1047 |
+
if self.closed:
|
| 1048 |
+
return
|
| 1049 |
+
if self.mode == "rb":
|
| 1050 |
+
self.cache = None
|
| 1051 |
+
else:
|
| 1052 |
+
if not self.forced:
|
| 1053 |
+
await self.flush(force=True)
|
| 1054 |
+
|
| 1055 |
+
if self.fs is not None:
|
| 1056 |
+
self.fs.invalidate_cache(self.path)
|
| 1057 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
| 1058 |
+
|
| 1059 |
+
self.closed = True
|
| 1060 |
+
|
| 1061 |
+
async def flush(self, force=False):
|
| 1062 |
+
if self.closed:
|
| 1063 |
+
raise ValueError("Flush on closed file")
|
| 1064 |
+
if force and self.forced:
|
| 1065 |
+
raise ValueError("Force flush cannot be called more than once")
|
| 1066 |
+
if force:
|
| 1067 |
+
self.forced = True
|
| 1068 |
+
|
| 1069 |
+
if self.mode not in {"wb", "ab"}:
|
| 1070 |
+
# no-op to flush on read-mode
|
| 1071 |
+
return
|
| 1072 |
+
|
| 1073 |
+
if not force and self.buffer.tell() < self.blocksize:
|
| 1074 |
+
# Defer write on small block
|
| 1075 |
+
return
|
| 1076 |
+
|
| 1077 |
+
if self.offset is None:
|
| 1078 |
+
# Initialize a multipart upload
|
| 1079 |
+
self.offset = 0
|
| 1080 |
+
try:
|
| 1081 |
+
await self._initiate_upload()
|
| 1082 |
+
except:
|
| 1083 |
+
self.closed = True
|
| 1084 |
+
raise
|
| 1085 |
+
|
| 1086 |
+
if await self._upload_chunk(final=force) is not False:
|
| 1087 |
+
self.offset += self.buffer.seek(0, 2)
|
| 1088 |
+
self.buffer = io.BytesIO()
|
| 1089 |
+
|
| 1090 |
+
async def __aenter__(self):
|
| 1091 |
+
return self
|
| 1092 |
+
|
| 1093 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 1094 |
+
await self.close()
|
| 1095 |
+
|
| 1096 |
+
async def _fetch_range(self, start, end):
|
| 1097 |
+
raise NotImplementedError
|
| 1098 |
+
|
| 1099 |
+
async def _initiate_upload(self):
|
| 1100 |
+
pass
|
| 1101 |
+
|
| 1102 |
+
async def _upload_chunk(self, final=False):
|
| 1103 |
+
raise NotImplementedError
|
venv/lib/python3.10/site-packages/fsspec/caching.py
ADDED
|
@@ -0,0 +1,1004 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import collections
|
| 4 |
+
import functools
|
| 5 |
+
import logging
|
| 6 |
+
import math
|
| 7 |
+
import os
|
| 8 |
+
import threading
|
| 9 |
+
from collections import OrderedDict
|
| 10 |
+
from collections.abc import Callable
|
| 11 |
+
from concurrent.futures import Future, ThreadPoolExecutor
|
| 12 |
+
from itertools import groupby
|
| 13 |
+
from operator import itemgetter
|
| 14 |
+
from typing import TYPE_CHECKING, Any, ClassVar, Generic, NamedTuple, TypeVar
|
| 15 |
+
|
| 16 |
+
if TYPE_CHECKING:
|
| 17 |
+
import mmap
|
| 18 |
+
|
| 19 |
+
from typing_extensions import ParamSpec
|
| 20 |
+
|
| 21 |
+
P = ParamSpec("P")
|
| 22 |
+
else:
|
| 23 |
+
P = TypeVar("P")
|
| 24 |
+
|
| 25 |
+
T = TypeVar("T")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger("fsspec.caching")
|
| 29 |
+
|
| 30 |
+
Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
|
| 31 |
+
MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class BaseCache:
|
| 35 |
+
"""Pass-though cache: doesn't keep anything, calls every time
|
| 36 |
+
|
| 37 |
+
Acts as base class for other cachers
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
blocksize: int
|
| 42 |
+
How far to read ahead in numbers of bytes
|
| 43 |
+
fetcher: func
|
| 44 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 45 |
+
specified
|
| 46 |
+
size: int
|
| 47 |
+
How big this file is
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
name: ClassVar[str] = "none"
|
| 51 |
+
|
| 52 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 53 |
+
self.blocksize = blocksize
|
| 54 |
+
self.nblocks = 0
|
| 55 |
+
self.fetcher = fetcher
|
| 56 |
+
self.size = size
|
| 57 |
+
self.hit_count = 0
|
| 58 |
+
self.miss_count = 0
|
| 59 |
+
# the bytes that we actually requested
|
| 60 |
+
self.total_requested_bytes = 0
|
| 61 |
+
|
| 62 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 63 |
+
if start is None:
|
| 64 |
+
start = 0
|
| 65 |
+
if stop is None:
|
| 66 |
+
stop = self.size
|
| 67 |
+
if start >= self.size or start >= stop:
|
| 68 |
+
return b""
|
| 69 |
+
return self.fetcher(start, stop)
|
| 70 |
+
|
| 71 |
+
def _reset_stats(self) -> None:
|
| 72 |
+
"""Reset hit and miss counts for a more ganular report e.g. by file."""
|
| 73 |
+
self.hit_count = 0
|
| 74 |
+
self.miss_count = 0
|
| 75 |
+
self.total_requested_bytes = 0
|
| 76 |
+
|
| 77 |
+
def _log_stats(self) -> str:
|
| 78 |
+
"""Return a formatted string of the cache statistics."""
|
| 79 |
+
if self.hit_count == 0 and self.miss_count == 0:
|
| 80 |
+
# a cache that does nothing, this is for logs only
|
| 81 |
+
return ""
|
| 82 |
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
| 83 |
+
|
| 84 |
+
def __repr__(self) -> str:
|
| 85 |
+
# TODO: use rich for better formatting
|
| 86 |
+
return f"""
|
| 87 |
+
<{self.__class__.__name__}:
|
| 88 |
+
block size : {self.blocksize}
|
| 89 |
+
block count : {self.nblocks}
|
| 90 |
+
file size : {self.size}
|
| 91 |
+
cache hits : {self.hit_count}
|
| 92 |
+
cache misses: {self.miss_count}
|
| 93 |
+
total requested bytes: {self.total_requested_bytes}>
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class MMapCache(BaseCache):
|
| 98 |
+
"""memory-mapped sparse file cache
|
| 99 |
+
|
| 100 |
+
Opens temporary file, which is filled blocks-wise when data is requested.
|
| 101 |
+
Ensure there is enough disc space in the temporary location.
|
| 102 |
+
|
| 103 |
+
This cache method might only work on posix
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
blocksize: int
|
| 108 |
+
How far to read ahead in numbers of bytes
|
| 109 |
+
fetcher: Fetcher
|
| 110 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 111 |
+
specified
|
| 112 |
+
size: int
|
| 113 |
+
How big this file is
|
| 114 |
+
location: str
|
| 115 |
+
Where to create the temporary file. If None, a temporary file is
|
| 116 |
+
created using tempfile.TemporaryFile().
|
| 117 |
+
blocks: set[int]
|
| 118 |
+
Set of block numbers that have already been fetched. If None, an empty
|
| 119 |
+
set is created.
|
| 120 |
+
multi_fetcher: MultiFetcher
|
| 121 |
+
Function of the form f([(start, end)]) which gets bytes from remote
|
| 122 |
+
as specified. This function is used to fetch multiple blocks at once.
|
| 123 |
+
If not specified, the fetcher function is used instead.
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
name = "mmap"
|
| 127 |
+
|
| 128 |
+
def __init__(
|
| 129 |
+
self,
|
| 130 |
+
blocksize: int,
|
| 131 |
+
fetcher: Fetcher,
|
| 132 |
+
size: int,
|
| 133 |
+
location: str | None = None,
|
| 134 |
+
blocks: set[int] | None = None,
|
| 135 |
+
multi_fetcher: MultiFetcher | None = None,
|
| 136 |
+
) -> None:
|
| 137 |
+
super().__init__(blocksize, fetcher, size)
|
| 138 |
+
self.blocks = set() if blocks is None else blocks
|
| 139 |
+
self.location = location
|
| 140 |
+
self.multi_fetcher = multi_fetcher
|
| 141 |
+
self.cache = self._makefile()
|
| 142 |
+
|
| 143 |
+
def _makefile(self) -> mmap.mmap | bytearray:
|
| 144 |
+
import mmap
|
| 145 |
+
import tempfile
|
| 146 |
+
|
| 147 |
+
if self.size == 0:
|
| 148 |
+
return bytearray()
|
| 149 |
+
|
| 150 |
+
# posix version
|
| 151 |
+
if self.location is None or not os.path.exists(self.location):
|
| 152 |
+
if self.location is None:
|
| 153 |
+
fd = tempfile.TemporaryFile()
|
| 154 |
+
self.blocks = set()
|
| 155 |
+
else:
|
| 156 |
+
fd = open(self.location, "wb+")
|
| 157 |
+
fd.seek(self.size - 1)
|
| 158 |
+
fd.write(b"1")
|
| 159 |
+
fd.flush()
|
| 160 |
+
else:
|
| 161 |
+
fd = open(self.location, "r+b")
|
| 162 |
+
|
| 163 |
+
return mmap.mmap(fd.fileno(), self.size)
|
| 164 |
+
|
| 165 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 166 |
+
logger.debug(f"MMap cache fetching {start}-{end}")
|
| 167 |
+
if start is None:
|
| 168 |
+
start = 0
|
| 169 |
+
if end is None:
|
| 170 |
+
end = self.size
|
| 171 |
+
if start >= self.size or start >= end:
|
| 172 |
+
return b""
|
| 173 |
+
start_block = start // self.blocksize
|
| 174 |
+
end_block = end // self.blocksize
|
| 175 |
+
block_range = range(start_block, end_block + 1)
|
| 176 |
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
| 177 |
+
need = (i for i in block_range if i not in self.blocks)
|
| 178 |
+
# Count the number of blocks already cached
|
| 179 |
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
| 180 |
+
|
| 181 |
+
ranges = []
|
| 182 |
+
|
| 183 |
+
# Consolidate needed blocks.
|
| 184 |
+
# Algorithm adapted from Python 2.x itertools documentation.
|
| 185 |
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
| 186 |
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
| 187 |
+
# we can detect when the block number skips values. The key computes this difference.
|
| 188 |
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
| 189 |
+
# and a new group is started. In other words, this algorithm neatly groups
|
| 190 |
+
# runs of consecutive block numbers so they can be fetched together.
|
| 191 |
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
| 192 |
+
# Extract the blocks from the enumerated sequence
|
| 193 |
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
| 194 |
+
# Compute start of first block
|
| 195 |
+
sstart = _blocks[0] * self.blocksize
|
| 196 |
+
# Compute the end of the last block. Last block may not be full size.
|
| 197 |
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
| 198 |
+
|
| 199 |
+
# Fetch bytes (could be multiple consecutive blocks)
|
| 200 |
+
self.total_requested_bytes += send - sstart
|
| 201 |
+
logger.debug(
|
| 202 |
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
| 203 |
+
)
|
| 204 |
+
ranges.append((sstart, send))
|
| 205 |
+
|
| 206 |
+
# Update set of cached blocks
|
| 207 |
+
self.blocks.update(_blocks)
|
| 208 |
+
# Update cache statistics with number of blocks we had to cache
|
| 209 |
+
self.miss_count += len(_blocks)
|
| 210 |
+
|
| 211 |
+
if not ranges:
|
| 212 |
+
return self.cache[start:end]
|
| 213 |
+
|
| 214 |
+
if self.multi_fetcher:
|
| 215 |
+
logger.debug(f"MMap get blocks {ranges}")
|
| 216 |
+
for idx, r in enumerate(self.multi_fetcher(ranges)):
|
| 217 |
+
sstart, send = ranges[idx]
|
| 218 |
+
logger.debug(f"MMap copy block ({sstart}-{send}")
|
| 219 |
+
self.cache[sstart:send] = r
|
| 220 |
+
else:
|
| 221 |
+
for sstart, send in ranges:
|
| 222 |
+
logger.debug(f"MMap get block ({sstart}-{send}")
|
| 223 |
+
self.cache[sstart:send] = self.fetcher(sstart, send)
|
| 224 |
+
|
| 225 |
+
return self.cache[start:end]
|
| 226 |
+
|
| 227 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 228 |
+
state = self.__dict__.copy()
|
| 229 |
+
# Remove the unpicklable entries.
|
| 230 |
+
del state["cache"]
|
| 231 |
+
return state
|
| 232 |
+
|
| 233 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
| 234 |
+
# Restore instance attributes
|
| 235 |
+
self.__dict__.update(state)
|
| 236 |
+
self.cache = self._makefile()
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
class ReadAheadCache(BaseCache):
|
| 240 |
+
"""Cache which reads only when we get beyond a block of data
|
| 241 |
+
|
| 242 |
+
This is a much simpler version of BytesCache, and does not attempt to
|
| 243 |
+
fill holes in the cache or keep fragments alive. It is best suited to
|
| 244 |
+
many small reads in a sequential order (e.g., reading lines from a file).
|
| 245 |
+
"""
|
| 246 |
+
|
| 247 |
+
name = "readahead"
|
| 248 |
+
|
| 249 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 250 |
+
super().__init__(blocksize, fetcher, size)
|
| 251 |
+
self.cache = b""
|
| 252 |
+
self.start = 0
|
| 253 |
+
self.end = 0
|
| 254 |
+
|
| 255 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 256 |
+
if start is None:
|
| 257 |
+
start = 0
|
| 258 |
+
if end is None or end > self.size:
|
| 259 |
+
end = self.size
|
| 260 |
+
if start >= self.size or start >= end:
|
| 261 |
+
return b""
|
| 262 |
+
l = end - start
|
| 263 |
+
if start >= self.start and end <= self.end:
|
| 264 |
+
# cache hit
|
| 265 |
+
self.hit_count += 1
|
| 266 |
+
return self.cache[start - self.start : end - self.start]
|
| 267 |
+
elif self.start <= start < self.end:
|
| 268 |
+
# partial hit
|
| 269 |
+
self.miss_count += 1
|
| 270 |
+
part = self.cache[start - self.start :]
|
| 271 |
+
l -= len(part)
|
| 272 |
+
start = self.end
|
| 273 |
+
else:
|
| 274 |
+
# miss
|
| 275 |
+
self.miss_count += 1
|
| 276 |
+
part = b""
|
| 277 |
+
end = min(self.size, end + self.blocksize)
|
| 278 |
+
self.total_requested_bytes += end - start
|
| 279 |
+
self.cache = self.fetcher(start, end) # new block replaces old
|
| 280 |
+
self.start = start
|
| 281 |
+
self.end = self.start + len(self.cache)
|
| 282 |
+
return part + self.cache[:l]
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
class FirstChunkCache(BaseCache):
|
| 286 |
+
"""Caches the first block of a file only
|
| 287 |
+
|
| 288 |
+
This may be useful for file types where the metadata is stored in the header,
|
| 289 |
+
but is randomly accessed.
|
| 290 |
+
"""
|
| 291 |
+
|
| 292 |
+
name = "first"
|
| 293 |
+
|
| 294 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
| 295 |
+
if blocksize > size:
|
| 296 |
+
# this will buffer the whole thing
|
| 297 |
+
blocksize = size
|
| 298 |
+
super().__init__(blocksize, fetcher, size)
|
| 299 |
+
self.cache: bytes | None = None
|
| 300 |
+
|
| 301 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 302 |
+
start = start or 0
|
| 303 |
+
if start > self.size:
|
| 304 |
+
logger.debug("FirstChunkCache: requested start > file size")
|
| 305 |
+
return b""
|
| 306 |
+
|
| 307 |
+
end = min(end, self.size)
|
| 308 |
+
|
| 309 |
+
if start < self.blocksize:
|
| 310 |
+
if self.cache is None:
|
| 311 |
+
self.miss_count += 1
|
| 312 |
+
if end > self.blocksize:
|
| 313 |
+
self.total_requested_bytes += end
|
| 314 |
+
data = self.fetcher(0, end)
|
| 315 |
+
self.cache = data[: self.blocksize]
|
| 316 |
+
return data[start:]
|
| 317 |
+
self.cache = self.fetcher(0, self.blocksize)
|
| 318 |
+
self.total_requested_bytes += self.blocksize
|
| 319 |
+
part = self.cache[start:end]
|
| 320 |
+
if end > self.blocksize:
|
| 321 |
+
self.total_requested_bytes += end - self.blocksize
|
| 322 |
+
part += self.fetcher(self.blocksize, end)
|
| 323 |
+
self.hit_count += 1
|
| 324 |
+
return part
|
| 325 |
+
else:
|
| 326 |
+
self.miss_count += 1
|
| 327 |
+
self.total_requested_bytes += end - start
|
| 328 |
+
return self.fetcher(start, end)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
class BlockCache(BaseCache):
|
| 332 |
+
"""
|
| 333 |
+
Cache holding memory as a set of blocks.
|
| 334 |
+
|
| 335 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
| 336 |
+
stored in an LRU cache. The least recently accessed block is
|
| 337 |
+
discarded when more than ``maxblocks`` are stored.
|
| 338 |
+
|
| 339 |
+
Parameters
|
| 340 |
+
----------
|
| 341 |
+
blocksize : int
|
| 342 |
+
The number of bytes to store in each block.
|
| 343 |
+
Requests are only ever made for ``blocksize``, so this
|
| 344 |
+
should balance the overhead of making a request against
|
| 345 |
+
the granularity of the blocks.
|
| 346 |
+
fetcher : Callable
|
| 347 |
+
size : int
|
| 348 |
+
The total size of the file being cached.
|
| 349 |
+
maxblocks : int
|
| 350 |
+
The maximum number of blocks to cache for. The maximum memory
|
| 351 |
+
use for this cache is then ``blocksize * maxblocks``.
|
| 352 |
+
"""
|
| 353 |
+
|
| 354 |
+
name = "blockcache"
|
| 355 |
+
|
| 356 |
+
def __init__(
|
| 357 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
| 358 |
+
) -> None:
|
| 359 |
+
super().__init__(blocksize, fetcher, size)
|
| 360 |
+
self.nblocks = math.ceil(size / blocksize)
|
| 361 |
+
self.maxblocks = maxblocks
|
| 362 |
+
self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
|
| 363 |
+
|
| 364 |
+
def cache_info(self):
|
| 365 |
+
"""
|
| 366 |
+
The statistics on the block cache.
|
| 367 |
+
|
| 368 |
+
Returns
|
| 369 |
+
-------
|
| 370 |
+
NamedTuple
|
| 371 |
+
Returned directly from the LRU Cache used internally.
|
| 372 |
+
"""
|
| 373 |
+
return self._fetch_block_cached.cache_info()
|
| 374 |
+
|
| 375 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 376 |
+
state = self.__dict__
|
| 377 |
+
del state["_fetch_block_cached"]
|
| 378 |
+
return state
|
| 379 |
+
|
| 380 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
| 381 |
+
self.__dict__.update(state)
|
| 382 |
+
self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
|
| 383 |
+
self._fetch_block
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 387 |
+
if start is None:
|
| 388 |
+
start = 0
|
| 389 |
+
if end is None:
|
| 390 |
+
end = self.size
|
| 391 |
+
if start >= self.size or start >= end:
|
| 392 |
+
return b""
|
| 393 |
+
|
| 394 |
+
return self._read_cache(
|
| 395 |
+
start, end, start // self.blocksize, (end - 1) // self.blocksize
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
def _fetch_block(self, block_number: int) -> bytes:
|
| 399 |
+
"""
|
| 400 |
+
Fetch the block of data for `block_number`.
|
| 401 |
+
"""
|
| 402 |
+
if block_number > self.nblocks:
|
| 403 |
+
raise ValueError(
|
| 404 |
+
f"'block_number={block_number}' is greater than "
|
| 405 |
+
f"the number of blocks ({self.nblocks})"
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
start = block_number * self.blocksize
|
| 409 |
+
end = start + self.blocksize
|
| 410 |
+
self.total_requested_bytes += end - start
|
| 411 |
+
self.miss_count += 1
|
| 412 |
+
logger.info("BlockCache fetching block %d", block_number)
|
| 413 |
+
block_contents = super()._fetch(start, end)
|
| 414 |
+
return block_contents
|
| 415 |
+
|
| 416 |
+
def _read_cache(
|
| 417 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
| 418 |
+
) -> bytes:
|
| 419 |
+
"""
|
| 420 |
+
Read from our block cache.
|
| 421 |
+
|
| 422 |
+
Parameters
|
| 423 |
+
----------
|
| 424 |
+
start, end : int
|
| 425 |
+
The start and end byte positions.
|
| 426 |
+
start_block_number, end_block_number : int
|
| 427 |
+
The start and end block numbers.
|
| 428 |
+
"""
|
| 429 |
+
start_pos = start % self.blocksize
|
| 430 |
+
end_pos = end % self.blocksize
|
| 431 |
+
if end_pos == 0:
|
| 432 |
+
end_pos = self.blocksize
|
| 433 |
+
|
| 434 |
+
self.hit_count += 1
|
| 435 |
+
if start_block_number == end_block_number:
|
| 436 |
+
block: bytes = self._fetch_block_cached(start_block_number)
|
| 437 |
+
return block[start_pos:end_pos]
|
| 438 |
+
|
| 439 |
+
else:
|
| 440 |
+
# read from the initial
|
| 441 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
| 442 |
+
|
| 443 |
+
# intermediate blocks
|
| 444 |
+
# Note: it'd be nice to combine these into one big request. However
|
| 445 |
+
# that doesn't play nicely with our LRU cache.
|
| 446 |
+
out.extend(
|
| 447 |
+
map(
|
| 448 |
+
self._fetch_block_cached,
|
| 449 |
+
range(start_block_number + 1, end_block_number),
|
| 450 |
+
)
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
# final block
|
| 454 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
| 455 |
+
|
| 456 |
+
return b"".join(out)
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
class BytesCache(BaseCache):
|
| 460 |
+
"""Cache which holds data in a in-memory bytes object
|
| 461 |
+
|
| 462 |
+
Implements read-ahead by the block size, for semi-random reads progressing
|
| 463 |
+
through the file.
|
| 464 |
+
|
| 465 |
+
Parameters
|
| 466 |
+
----------
|
| 467 |
+
trim: bool
|
| 468 |
+
As we read more data, whether to discard the start of the buffer when
|
| 469 |
+
we are more than a blocksize ahead of it.
|
| 470 |
+
"""
|
| 471 |
+
|
| 472 |
+
name: ClassVar[str] = "bytes"
|
| 473 |
+
|
| 474 |
+
def __init__(
|
| 475 |
+
self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
|
| 476 |
+
) -> None:
|
| 477 |
+
super().__init__(blocksize, fetcher, size)
|
| 478 |
+
self.cache = b""
|
| 479 |
+
self.start: int | None = None
|
| 480 |
+
self.end: int | None = None
|
| 481 |
+
self.trim = trim
|
| 482 |
+
|
| 483 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 484 |
+
# TODO: only set start/end after fetch, in case it fails?
|
| 485 |
+
# is this where retry logic might go?
|
| 486 |
+
if start is None:
|
| 487 |
+
start = 0
|
| 488 |
+
if end is None:
|
| 489 |
+
end = self.size
|
| 490 |
+
if start >= self.size or start >= end:
|
| 491 |
+
return b""
|
| 492 |
+
if (
|
| 493 |
+
self.start is not None
|
| 494 |
+
and start >= self.start
|
| 495 |
+
and self.end is not None
|
| 496 |
+
and end < self.end
|
| 497 |
+
):
|
| 498 |
+
# cache hit: we have all the required data
|
| 499 |
+
offset = start - self.start
|
| 500 |
+
self.hit_count += 1
|
| 501 |
+
return self.cache[offset : offset + end - start]
|
| 502 |
+
|
| 503 |
+
if self.blocksize:
|
| 504 |
+
bend = min(self.size, end + self.blocksize)
|
| 505 |
+
else:
|
| 506 |
+
bend = end
|
| 507 |
+
|
| 508 |
+
if bend == start or start > self.size:
|
| 509 |
+
return b""
|
| 510 |
+
|
| 511 |
+
if (self.start is None or start < self.start) and (
|
| 512 |
+
self.end is None or end > self.end
|
| 513 |
+
):
|
| 514 |
+
# First read, or extending both before and after
|
| 515 |
+
self.total_requested_bytes += bend - start
|
| 516 |
+
self.miss_count += 1
|
| 517 |
+
self.cache = self.fetcher(start, bend)
|
| 518 |
+
self.start = start
|
| 519 |
+
else:
|
| 520 |
+
assert self.start is not None
|
| 521 |
+
assert self.end is not None
|
| 522 |
+
self.miss_count += 1
|
| 523 |
+
|
| 524 |
+
if start < self.start:
|
| 525 |
+
if self.end is None or self.end - end > self.blocksize:
|
| 526 |
+
self.total_requested_bytes += bend - start
|
| 527 |
+
self.cache = self.fetcher(start, bend)
|
| 528 |
+
self.start = start
|
| 529 |
+
else:
|
| 530 |
+
self.total_requested_bytes += self.start - start
|
| 531 |
+
new = self.fetcher(start, self.start)
|
| 532 |
+
self.start = start
|
| 533 |
+
self.cache = new + self.cache
|
| 534 |
+
elif self.end is not None and bend > self.end:
|
| 535 |
+
if self.end > self.size:
|
| 536 |
+
pass
|
| 537 |
+
elif end - self.end > self.blocksize:
|
| 538 |
+
self.total_requested_bytes += bend - start
|
| 539 |
+
self.cache = self.fetcher(start, bend)
|
| 540 |
+
self.start = start
|
| 541 |
+
else:
|
| 542 |
+
self.total_requested_bytes += bend - self.end
|
| 543 |
+
new = self.fetcher(self.end, bend)
|
| 544 |
+
self.cache = self.cache + new
|
| 545 |
+
|
| 546 |
+
self.end = self.start + len(self.cache)
|
| 547 |
+
offset = start - self.start
|
| 548 |
+
out = self.cache[offset : offset + end - start]
|
| 549 |
+
if self.trim:
|
| 550 |
+
num = (self.end - self.start) // (self.blocksize + 1)
|
| 551 |
+
if num > 1:
|
| 552 |
+
self.start += self.blocksize * num
|
| 553 |
+
self.cache = self.cache[self.blocksize * num :]
|
| 554 |
+
return out
|
| 555 |
+
|
| 556 |
+
def __len__(self) -> int:
|
| 557 |
+
return len(self.cache)
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
class AllBytes(BaseCache):
|
| 561 |
+
"""Cache entire contents of the file"""
|
| 562 |
+
|
| 563 |
+
name: ClassVar[str] = "all"
|
| 564 |
+
|
| 565 |
+
def __init__(
|
| 566 |
+
self,
|
| 567 |
+
blocksize: int | None = None,
|
| 568 |
+
fetcher: Fetcher | None = None,
|
| 569 |
+
size: int | None = None,
|
| 570 |
+
data: bytes | None = None,
|
| 571 |
+
) -> None:
|
| 572 |
+
super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
|
| 573 |
+
if data is None:
|
| 574 |
+
self.miss_count += 1
|
| 575 |
+
self.total_requested_bytes += self.size
|
| 576 |
+
data = self.fetcher(0, self.size)
|
| 577 |
+
self.data = data
|
| 578 |
+
|
| 579 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 580 |
+
self.hit_count += 1
|
| 581 |
+
return self.data[start:stop]
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
class KnownPartsOfAFile(BaseCache):
|
| 585 |
+
"""
|
| 586 |
+
Cache holding known file parts.
|
| 587 |
+
|
| 588 |
+
Parameters
|
| 589 |
+
----------
|
| 590 |
+
blocksize: int
|
| 591 |
+
How far to read ahead in numbers of bytes
|
| 592 |
+
fetcher: func
|
| 593 |
+
Function of the form f(start, end) which gets bytes from remote as
|
| 594 |
+
specified
|
| 595 |
+
size: int
|
| 596 |
+
How big this file is
|
| 597 |
+
data: dict
|
| 598 |
+
A dictionary mapping explicit `(start, stop)` file-offset tuples
|
| 599 |
+
with known bytes.
|
| 600 |
+
strict: bool, default True
|
| 601 |
+
Whether to fetch reads that go beyond a known byte-range boundary.
|
| 602 |
+
If `False`, any read that ends outside a known part will be zero
|
| 603 |
+
padded. Note that zero padding will not be used for reads that
|
| 604 |
+
begin outside a known byte-range.
|
| 605 |
+
"""
|
| 606 |
+
|
| 607 |
+
name: ClassVar[str] = "parts"
|
| 608 |
+
|
| 609 |
+
def __init__(
|
| 610 |
+
self,
|
| 611 |
+
blocksize: int,
|
| 612 |
+
fetcher: Fetcher,
|
| 613 |
+
size: int,
|
| 614 |
+
data: dict[tuple[int, int], bytes] | None = None,
|
| 615 |
+
strict: bool = False,
|
| 616 |
+
**_: Any,
|
| 617 |
+
):
|
| 618 |
+
super().__init__(blocksize, fetcher, size)
|
| 619 |
+
self.strict = strict
|
| 620 |
+
|
| 621 |
+
# simple consolidation of contiguous blocks
|
| 622 |
+
if data:
|
| 623 |
+
old_offsets = sorted(data.keys())
|
| 624 |
+
offsets = [old_offsets[0]]
|
| 625 |
+
blocks = [data.pop(old_offsets[0])]
|
| 626 |
+
for start, stop in old_offsets[1:]:
|
| 627 |
+
start0, stop0 = offsets[-1]
|
| 628 |
+
if start == stop0:
|
| 629 |
+
offsets[-1] = (start0, stop)
|
| 630 |
+
blocks[-1] += data.pop((start, stop))
|
| 631 |
+
else:
|
| 632 |
+
offsets.append((start, stop))
|
| 633 |
+
blocks.append(data.pop((start, stop)))
|
| 634 |
+
|
| 635 |
+
self.data = dict(zip(offsets, blocks))
|
| 636 |
+
else:
|
| 637 |
+
self.data = {}
|
| 638 |
+
|
| 639 |
+
@property
|
| 640 |
+
def size(self):
|
| 641 |
+
return sum(_[1] - _[0] for _ in self.data)
|
| 642 |
+
|
| 643 |
+
@size.setter
|
| 644 |
+
def size(self, value):
|
| 645 |
+
pass
|
| 646 |
+
|
| 647 |
+
@property
|
| 648 |
+
def nblocks(self):
|
| 649 |
+
return len(self.data)
|
| 650 |
+
|
| 651 |
+
@nblocks.setter
|
| 652 |
+
def nblocks(self, value):
|
| 653 |
+
pass
|
| 654 |
+
|
| 655 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
| 656 |
+
logger.debug("Known parts request %s %s", start, stop)
|
| 657 |
+
if start is None:
|
| 658 |
+
start = 0
|
| 659 |
+
if stop is None:
|
| 660 |
+
stop = self.size
|
| 661 |
+
self.total_requested_bytes += stop - start
|
| 662 |
+
out = b""
|
| 663 |
+
started = False
|
| 664 |
+
loc_old = 0
|
| 665 |
+
for loc0, loc1 in sorted(self.data):
|
| 666 |
+
if (loc0 <= start < loc1) and (loc0 <= stop <= loc1):
|
| 667 |
+
# entirely within the block
|
| 668 |
+
off = start - loc0
|
| 669 |
+
self.hit_count += 1
|
| 670 |
+
return self.data[(loc0, loc1)][off : off + stop - start]
|
| 671 |
+
if stop <= loc0:
|
| 672 |
+
break
|
| 673 |
+
if started and loc0 > loc_old:
|
| 674 |
+
# a gap where we need data
|
| 675 |
+
self.miss_count += 1
|
| 676 |
+
if self.strict:
|
| 677 |
+
raise ValueError
|
| 678 |
+
out += b"\x00" * (loc0 - loc_old)
|
| 679 |
+
if loc0 <= start < loc1:
|
| 680 |
+
# found the start
|
| 681 |
+
self.hit_count += 1
|
| 682 |
+
off = start - loc0
|
| 683 |
+
out = self.data[(loc0, loc1)][off : off + stop - start]
|
| 684 |
+
started = True
|
| 685 |
+
elif start < loc0 and stop > loc1:
|
| 686 |
+
# the whole block
|
| 687 |
+
self.hit_count += 1
|
| 688 |
+
out += self.data[(loc0, loc1)]
|
| 689 |
+
elif loc0 <= stop <= loc1:
|
| 690 |
+
# end block
|
| 691 |
+
self.hit_count += 1
|
| 692 |
+
out = out + self.data[(loc0, loc1)][: stop - loc0]
|
| 693 |
+
return out
|
| 694 |
+
loc_old = loc1
|
| 695 |
+
self.miss_count += 1
|
| 696 |
+
if started and not self.strict:
|
| 697 |
+
out = out + b"\x00" * (stop - loc_old)
|
| 698 |
+
return out
|
| 699 |
+
raise ValueError
|
| 700 |
+
|
| 701 |
+
|
| 702 |
+
class UpdatableLRU(Generic[P, T]):
|
| 703 |
+
"""
|
| 704 |
+
Custom implementation of LRU cache that allows updating keys
|
| 705 |
+
|
| 706 |
+
Used by BackgroudBlockCache
|
| 707 |
+
"""
|
| 708 |
+
|
| 709 |
+
class CacheInfo(NamedTuple):
|
| 710 |
+
hits: int
|
| 711 |
+
misses: int
|
| 712 |
+
maxsize: int
|
| 713 |
+
currsize: int
|
| 714 |
+
|
| 715 |
+
def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
|
| 716 |
+
self._cache: OrderedDict[Any, T] = collections.OrderedDict()
|
| 717 |
+
self._func = func
|
| 718 |
+
self._max_size = max_size
|
| 719 |
+
self._hits = 0
|
| 720 |
+
self._misses = 0
|
| 721 |
+
self._lock = threading.Lock()
|
| 722 |
+
|
| 723 |
+
def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
|
| 724 |
+
if kwargs:
|
| 725 |
+
raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
|
| 726 |
+
with self._lock:
|
| 727 |
+
if args in self._cache:
|
| 728 |
+
self._cache.move_to_end(args)
|
| 729 |
+
self._hits += 1
|
| 730 |
+
return self._cache[args]
|
| 731 |
+
|
| 732 |
+
result = self._func(*args, **kwargs)
|
| 733 |
+
|
| 734 |
+
with self._lock:
|
| 735 |
+
self._cache[args] = result
|
| 736 |
+
self._misses += 1
|
| 737 |
+
if len(self._cache) > self._max_size:
|
| 738 |
+
self._cache.popitem(last=False)
|
| 739 |
+
|
| 740 |
+
return result
|
| 741 |
+
|
| 742 |
+
def is_key_cached(self, *args: Any) -> bool:
|
| 743 |
+
with self._lock:
|
| 744 |
+
return args in self._cache
|
| 745 |
+
|
| 746 |
+
def add_key(self, result: T, *args: Any) -> None:
|
| 747 |
+
with self._lock:
|
| 748 |
+
self._cache[args] = result
|
| 749 |
+
if len(self._cache) > self._max_size:
|
| 750 |
+
self._cache.popitem(last=False)
|
| 751 |
+
|
| 752 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
| 753 |
+
with self._lock:
|
| 754 |
+
return self.CacheInfo(
|
| 755 |
+
maxsize=self._max_size,
|
| 756 |
+
currsize=len(self._cache),
|
| 757 |
+
hits=self._hits,
|
| 758 |
+
misses=self._misses,
|
| 759 |
+
)
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
class BackgroundBlockCache(BaseCache):
|
| 763 |
+
"""
|
| 764 |
+
Cache holding memory as a set of blocks with pre-loading of
|
| 765 |
+
the next block in the background.
|
| 766 |
+
|
| 767 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
| 768 |
+
stored in an LRU cache. The least recently accessed block is
|
| 769 |
+
discarded when more than ``maxblocks`` are stored. If the
|
| 770 |
+
next block is not in cache, it is loaded in a separate thread
|
| 771 |
+
in non-blocking way.
|
| 772 |
+
|
| 773 |
+
Parameters
|
| 774 |
+
----------
|
| 775 |
+
blocksize : int
|
| 776 |
+
The number of bytes to store in each block.
|
| 777 |
+
Requests are only ever made for ``blocksize``, so this
|
| 778 |
+
should balance the overhead of making a request against
|
| 779 |
+
the granularity of the blocks.
|
| 780 |
+
fetcher : Callable
|
| 781 |
+
size : int
|
| 782 |
+
The total size of the file being cached.
|
| 783 |
+
maxblocks : int
|
| 784 |
+
The maximum number of blocks to cache for. The maximum memory
|
| 785 |
+
use for this cache is then ``blocksize * maxblocks``.
|
| 786 |
+
"""
|
| 787 |
+
|
| 788 |
+
name: ClassVar[str] = "background"
|
| 789 |
+
|
| 790 |
+
def __init__(
|
| 791 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
| 792 |
+
) -> None:
|
| 793 |
+
super().__init__(blocksize, fetcher, size)
|
| 794 |
+
self.nblocks = math.ceil(size / blocksize)
|
| 795 |
+
self.maxblocks = maxblocks
|
| 796 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
|
| 797 |
+
|
| 798 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
| 799 |
+
self._fetch_future_block_number: int | None = None
|
| 800 |
+
self._fetch_future: Future[bytes] | None = None
|
| 801 |
+
self._fetch_future_lock = threading.Lock()
|
| 802 |
+
|
| 803 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
| 804 |
+
"""
|
| 805 |
+
The statistics on the block cache.
|
| 806 |
+
|
| 807 |
+
Returns
|
| 808 |
+
-------
|
| 809 |
+
NamedTuple
|
| 810 |
+
Returned directly from the LRU Cache used internally.
|
| 811 |
+
"""
|
| 812 |
+
return self._fetch_block_cached.cache_info()
|
| 813 |
+
|
| 814 |
+
def __getstate__(self) -> dict[str, Any]:
|
| 815 |
+
state = self.__dict__
|
| 816 |
+
del state["_fetch_block_cached"]
|
| 817 |
+
del state["_thread_executor"]
|
| 818 |
+
del state["_fetch_future_block_number"]
|
| 819 |
+
del state["_fetch_future"]
|
| 820 |
+
del state["_fetch_future_lock"]
|
| 821 |
+
return state
|
| 822 |
+
|
| 823 |
+
def __setstate__(self, state) -> None:
|
| 824 |
+
self.__dict__.update(state)
|
| 825 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
|
| 826 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
| 827 |
+
self._fetch_future_block_number = None
|
| 828 |
+
self._fetch_future = None
|
| 829 |
+
self._fetch_future_lock = threading.Lock()
|
| 830 |
+
|
| 831 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
| 832 |
+
if start is None:
|
| 833 |
+
start = 0
|
| 834 |
+
if end is None:
|
| 835 |
+
end = self.size
|
| 836 |
+
if start >= self.size or start >= end:
|
| 837 |
+
return b""
|
| 838 |
+
|
| 839 |
+
# byte position -> block numbers
|
| 840 |
+
start_block_number = start // self.blocksize
|
| 841 |
+
end_block_number = end // self.blocksize
|
| 842 |
+
|
| 843 |
+
fetch_future_block_number = None
|
| 844 |
+
fetch_future = None
|
| 845 |
+
with self._fetch_future_lock:
|
| 846 |
+
# Background thread is running. Check we we can or must join it.
|
| 847 |
+
if self._fetch_future is not None:
|
| 848 |
+
assert self._fetch_future_block_number is not None
|
| 849 |
+
if self._fetch_future.done():
|
| 850 |
+
logger.info("BlockCache joined background fetch without waiting.")
|
| 851 |
+
self._fetch_block_cached.add_key(
|
| 852 |
+
self._fetch_future.result(), self._fetch_future_block_number
|
| 853 |
+
)
|
| 854 |
+
# Cleanup the fetch variables. Done with fetching the block.
|
| 855 |
+
self._fetch_future_block_number = None
|
| 856 |
+
self._fetch_future = None
|
| 857 |
+
else:
|
| 858 |
+
# Must join if we need the block for the current fetch
|
| 859 |
+
must_join = bool(
|
| 860 |
+
start_block_number
|
| 861 |
+
<= self._fetch_future_block_number
|
| 862 |
+
<= end_block_number
|
| 863 |
+
)
|
| 864 |
+
if must_join:
|
| 865 |
+
# Copy to the local variables to release lock
|
| 866 |
+
# before waiting for result
|
| 867 |
+
fetch_future_block_number = self._fetch_future_block_number
|
| 868 |
+
fetch_future = self._fetch_future
|
| 869 |
+
|
| 870 |
+
# Cleanup the fetch variables. Have a local copy.
|
| 871 |
+
self._fetch_future_block_number = None
|
| 872 |
+
self._fetch_future = None
|
| 873 |
+
|
| 874 |
+
# Need to wait for the future for the current read
|
| 875 |
+
if fetch_future is not None:
|
| 876 |
+
logger.info("BlockCache waiting for background fetch.")
|
| 877 |
+
# Wait until result and put it in cache
|
| 878 |
+
self._fetch_block_cached.add_key(
|
| 879 |
+
fetch_future.result(), fetch_future_block_number
|
| 880 |
+
)
|
| 881 |
+
|
| 882 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
| 883 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
| 884 |
+
self._fetch_block_cached(block_number)
|
| 885 |
+
|
| 886 |
+
# fetch next block in the background if nothing is running in the background,
|
| 887 |
+
# the block is within file and it is not already cached
|
| 888 |
+
end_block_plus_1 = end_block_number + 1
|
| 889 |
+
with self._fetch_future_lock:
|
| 890 |
+
if (
|
| 891 |
+
self._fetch_future is None
|
| 892 |
+
and end_block_plus_1 <= self.nblocks
|
| 893 |
+
and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
|
| 894 |
+
):
|
| 895 |
+
self._fetch_future_block_number = end_block_plus_1
|
| 896 |
+
self._fetch_future = self._thread_executor.submit(
|
| 897 |
+
self._fetch_block, end_block_plus_1, "async"
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
return self._read_cache(
|
| 901 |
+
start,
|
| 902 |
+
end,
|
| 903 |
+
start_block_number=start_block_number,
|
| 904 |
+
end_block_number=end_block_number,
|
| 905 |
+
)
|
| 906 |
+
|
| 907 |
+
def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
|
| 908 |
+
"""
|
| 909 |
+
Fetch the block of data for `block_number`.
|
| 910 |
+
"""
|
| 911 |
+
if block_number > self.nblocks:
|
| 912 |
+
raise ValueError(
|
| 913 |
+
f"'block_number={block_number}' is greater than "
|
| 914 |
+
f"the number of blocks ({self.nblocks})"
|
| 915 |
+
)
|
| 916 |
+
|
| 917 |
+
start = block_number * self.blocksize
|
| 918 |
+
end = start + self.blocksize
|
| 919 |
+
logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
|
| 920 |
+
self.total_requested_bytes += end - start
|
| 921 |
+
self.miss_count += 1
|
| 922 |
+
block_contents = super()._fetch(start, end)
|
| 923 |
+
return block_contents
|
| 924 |
+
|
| 925 |
+
def _read_cache(
|
| 926 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
| 927 |
+
) -> bytes:
|
| 928 |
+
"""
|
| 929 |
+
Read from our block cache.
|
| 930 |
+
|
| 931 |
+
Parameters
|
| 932 |
+
----------
|
| 933 |
+
start, end : int
|
| 934 |
+
The start and end byte positions.
|
| 935 |
+
start_block_number, end_block_number : int
|
| 936 |
+
The start and end block numbers.
|
| 937 |
+
"""
|
| 938 |
+
start_pos = start % self.blocksize
|
| 939 |
+
end_pos = end % self.blocksize
|
| 940 |
+
|
| 941 |
+
# kind of pointless to count this as a hit, but it is
|
| 942 |
+
self.hit_count += 1
|
| 943 |
+
|
| 944 |
+
if start_block_number == end_block_number:
|
| 945 |
+
block = self._fetch_block_cached(start_block_number)
|
| 946 |
+
return block[start_pos:end_pos]
|
| 947 |
+
|
| 948 |
+
else:
|
| 949 |
+
# read from the initial
|
| 950 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
| 951 |
+
|
| 952 |
+
# intermediate blocks
|
| 953 |
+
# Note: it'd be nice to combine these into one big request. However
|
| 954 |
+
# that doesn't play nicely with our LRU cache.
|
| 955 |
+
out.extend(
|
| 956 |
+
map(
|
| 957 |
+
self._fetch_block_cached,
|
| 958 |
+
range(start_block_number + 1, end_block_number),
|
| 959 |
+
)
|
| 960 |
+
)
|
| 961 |
+
|
| 962 |
+
# final block
|
| 963 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
| 964 |
+
|
| 965 |
+
return b"".join(out)
|
| 966 |
+
|
| 967 |
+
|
| 968 |
+
caches: dict[str | None, type[BaseCache]] = {
|
| 969 |
+
# one custom case
|
| 970 |
+
None: BaseCache,
|
| 971 |
+
}
|
| 972 |
+
|
| 973 |
+
|
| 974 |
+
def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
|
| 975 |
+
"""'Register' cache implementation.
|
| 976 |
+
|
| 977 |
+
Parameters
|
| 978 |
+
----------
|
| 979 |
+
clobber: bool, optional
|
| 980 |
+
If set to True (default is False) - allow to overwrite existing
|
| 981 |
+
entry.
|
| 982 |
+
|
| 983 |
+
Raises
|
| 984 |
+
------
|
| 985 |
+
ValueError
|
| 986 |
+
"""
|
| 987 |
+
name = cls.name
|
| 988 |
+
if not clobber and name in caches:
|
| 989 |
+
raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
|
| 990 |
+
caches[name] = cls
|
| 991 |
+
|
| 992 |
+
|
| 993 |
+
for c in (
|
| 994 |
+
BaseCache,
|
| 995 |
+
MMapCache,
|
| 996 |
+
BytesCache,
|
| 997 |
+
ReadAheadCache,
|
| 998 |
+
BlockCache,
|
| 999 |
+
FirstChunkCache,
|
| 1000 |
+
AllBytes,
|
| 1001 |
+
KnownPartsOfAFile,
|
| 1002 |
+
BackgroundBlockCache,
|
| 1003 |
+
):
|
| 1004 |
+
register_cache(c)
|
venv/lib/python3.10/site-packages/fsspec/callbacks.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import wraps
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Callback:
|
| 5 |
+
"""
|
| 6 |
+
Base class and interface for callback mechanism
|
| 7 |
+
|
| 8 |
+
This class can be used directly for monitoring file transfers by
|
| 9 |
+
providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
|
| 10 |
+
below), or subclassed for more specialised behaviour.
|
| 11 |
+
|
| 12 |
+
Parameters
|
| 13 |
+
----------
|
| 14 |
+
size: int (optional)
|
| 15 |
+
Nominal quantity for the value that corresponds to a complete
|
| 16 |
+
transfer, e.g., total number of tiles or total number of
|
| 17 |
+
bytes
|
| 18 |
+
value: int (0)
|
| 19 |
+
Starting internal counter value
|
| 20 |
+
hooks: dict or None
|
| 21 |
+
A dict of named functions to be called on each update. The signature
|
| 22 |
+
of these must be ``f(size, value, **kwargs)``
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, size=None, value=0, hooks=None, **kwargs):
|
| 26 |
+
self.size = size
|
| 27 |
+
self.value = value
|
| 28 |
+
self.hooks = hooks or {}
|
| 29 |
+
self.kw = kwargs
|
| 30 |
+
|
| 31 |
+
def __enter__(self):
|
| 32 |
+
return self
|
| 33 |
+
|
| 34 |
+
def __exit__(self, *exc_args):
|
| 35 |
+
self.close()
|
| 36 |
+
|
| 37 |
+
def close(self):
|
| 38 |
+
"""Close callback."""
|
| 39 |
+
|
| 40 |
+
def branched(self, path_1, path_2, **kwargs):
|
| 41 |
+
"""
|
| 42 |
+
Return callback for child transfers
|
| 43 |
+
|
| 44 |
+
If this callback is operating at a higher level, e.g., put, which may
|
| 45 |
+
trigger transfers that can also be monitored. The function returns a callback
|
| 46 |
+
that has to be passed to the child method, e.g., put_file,
|
| 47 |
+
as `callback=` argument.
|
| 48 |
+
|
| 49 |
+
The implementation uses `callback.branch` for compatibility.
|
| 50 |
+
When implementing callbacks, it is recommended to override this function instead
|
| 51 |
+
of `branch` and avoid calling `super().branched(...)`.
|
| 52 |
+
|
| 53 |
+
Prefer using this function over `branch`.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
path_1: str
|
| 58 |
+
Child's source path
|
| 59 |
+
path_2: str
|
| 60 |
+
Child's destination path
|
| 61 |
+
**kwargs:
|
| 62 |
+
Arbitrary keyword arguments
|
| 63 |
+
|
| 64 |
+
Returns
|
| 65 |
+
-------
|
| 66 |
+
callback: Callback
|
| 67 |
+
A callback instance to be passed to the child method
|
| 68 |
+
"""
|
| 69 |
+
self.branch(path_1, path_2, kwargs)
|
| 70 |
+
# mutate kwargs so that we can force the caller to pass "callback=" explicitly
|
| 71 |
+
return kwargs.pop("callback", DEFAULT_CALLBACK)
|
| 72 |
+
|
| 73 |
+
def branch_coro(self, fn):
|
| 74 |
+
"""
|
| 75 |
+
Wraps a coroutine, and pass a new child callback to it.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
@wraps(fn)
|
| 79 |
+
async def func(path1, path2: str, **kwargs):
|
| 80 |
+
with self.branched(path1, path2, **kwargs) as child:
|
| 81 |
+
return await fn(path1, path2, callback=child, **kwargs)
|
| 82 |
+
|
| 83 |
+
return func
|
| 84 |
+
|
| 85 |
+
def set_size(self, size):
|
| 86 |
+
"""
|
| 87 |
+
Set the internal maximum size attribute
|
| 88 |
+
|
| 89 |
+
Usually called if not initially set at instantiation. Note that this
|
| 90 |
+
triggers a ``call()``.
|
| 91 |
+
|
| 92 |
+
Parameters
|
| 93 |
+
----------
|
| 94 |
+
size: int
|
| 95 |
+
"""
|
| 96 |
+
self.size = size
|
| 97 |
+
self.call()
|
| 98 |
+
|
| 99 |
+
def absolute_update(self, value):
|
| 100 |
+
"""
|
| 101 |
+
Set the internal value state
|
| 102 |
+
|
| 103 |
+
Triggers ``call()``
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
value: int
|
| 108 |
+
"""
|
| 109 |
+
self.value = value
|
| 110 |
+
self.call()
|
| 111 |
+
|
| 112 |
+
def relative_update(self, inc=1):
|
| 113 |
+
"""
|
| 114 |
+
Delta increment the internal counter
|
| 115 |
+
|
| 116 |
+
Triggers ``call()``
|
| 117 |
+
|
| 118 |
+
Parameters
|
| 119 |
+
----------
|
| 120 |
+
inc: int
|
| 121 |
+
"""
|
| 122 |
+
self.value += inc
|
| 123 |
+
self.call()
|
| 124 |
+
|
| 125 |
+
def call(self, hook_name=None, **kwargs):
|
| 126 |
+
"""
|
| 127 |
+
Execute hook(s) with current state
|
| 128 |
+
|
| 129 |
+
Each function is passed the internal size and current value
|
| 130 |
+
|
| 131 |
+
Parameters
|
| 132 |
+
----------
|
| 133 |
+
hook_name: str or None
|
| 134 |
+
If given, execute on this hook
|
| 135 |
+
kwargs: passed on to (all) hook(s)
|
| 136 |
+
"""
|
| 137 |
+
if not self.hooks:
|
| 138 |
+
return
|
| 139 |
+
kw = self.kw.copy()
|
| 140 |
+
kw.update(kwargs)
|
| 141 |
+
if hook_name:
|
| 142 |
+
if hook_name not in self.hooks:
|
| 143 |
+
return
|
| 144 |
+
return self.hooks[hook_name](self.size, self.value, **kw)
|
| 145 |
+
for hook in self.hooks.values() or []:
|
| 146 |
+
hook(self.size, self.value, **kw)
|
| 147 |
+
|
| 148 |
+
def wrap(self, iterable):
|
| 149 |
+
"""
|
| 150 |
+
Wrap an iterable to call ``relative_update`` on each iterations
|
| 151 |
+
|
| 152 |
+
Parameters
|
| 153 |
+
----------
|
| 154 |
+
iterable: Iterable
|
| 155 |
+
The iterable that is being wrapped
|
| 156 |
+
"""
|
| 157 |
+
for item in iterable:
|
| 158 |
+
self.relative_update()
|
| 159 |
+
yield item
|
| 160 |
+
|
| 161 |
+
def branch(self, path_1, path_2, kwargs):
|
| 162 |
+
"""
|
| 163 |
+
Set callbacks for child transfers
|
| 164 |
+
|
| 165 |
+
If this callback is operating at a higher level, e.g., put, which may
|
| 166 |
+
trigger transfers that can also be monitored. The passed kwargs are
|
| 167 |
+
to be *mutated* to add ``callback=``, if this class supports branching
|
| 168 |
+
to children.
|
| 169 |
+
|
| 170 |
+
Parameters
|
| 171 |
+
----------
|
| 172 |
+
path_1: str
|
| 173 |
+
Child's source path
|
| 174 |
+
path_2: str
|
| 175 |
+
Child's destination path
|
| 176 |
+
kwargs: dict
|
| 177 |
+
arguments passed to child method, e.g., put_file.
|
| 178 |
+
|
| 179 |
+
Returns
|
| 180 |
+
-------
|
| 181 |
+
|
| 182 |
+
"""
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
def no_op(self, *_, **__):
|
| 186 |
+
pass
|
| 187 |
+
|
| 188 |
+
def __getattr__(self, item):
|
| 189 |
+
"""
|
| 190 |
+
If undefined methods are called on this class, nothing happens
|
| 191 |
+
"""
|
| 192 |
+
return self.no_op
|
| 193 |
+
|
| 194 |
+
@classmethod
|
| 195 |
+
def as_callback(cls, maybe_callback=None):
|
| 196 |
+
"""Transform callback=... into Callback instance
|
| 197 |
+
|
| 198 |
+
For the special value of ``None``, return the global instance of
|
| 199 |
+
``NoOpCallback``. This is an alternative to including
|
| 200 |
+
``callback=DEFAULT_CALLBACK`` directly in a method signature.
|
| 201 |
+
"""
|
| 202 |
+
if maybe_callback is None:
|
| 203 |
+
return DEFAULT_CALLBACK
|
| 204 |
+
return maybe_callback
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
class NoOpCallback(Callback):
|
| 208 |
+
"""
|
| 209 |
+
This implementation of Callback does exactly nothing
|
| 210 |
+
"""
|
| 211 |
+
|
| 212 |
+
def call(self, *args, **kwargs):
|
| 213 |
+
return None
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
class DotPrinterCallback(Callback):
|
| 217 |
+
"""
|
| 218 |
+
Simple example Callback implementation
|
| 219 |
+
|
| 220 |
+
Almost identical to Callback with a hook that prints a char; here we
|
| 221 |
+
demonstrate how the outer layer may print "#" and the inner layer "."
|
| 222 |
+
"""
|
| 223 |
+
|
| 224 |
+
def __init__(self, chr_to_print="#", **kwargs):
|
| 225 |
+
self.chr = chr_to_print
|
| 226 |
+
super().__init__(**kwargs)
|
| 227 |
+
|
| 228 |
+
def branch(self, path_1, path_2, kwargs):
|
| 229 |
+
"""Mutate kwargs to add new instance with different print char"""
|
| 230 |
+
kwargs["callback"] = DotPrinterCallback(".")
|
| 231 |
+
|
| 232 |
+
def call(self, **kwargs):
|
| 233 |
+
"""Just outputs a character"""
|
| 234 |
+
print(self.chr, end="")
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
class TqdmCallback(Callback):
|
| 238 |
+
"""
|
| 239 |
+
A callback to display a progress bar using tqdm
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
tqdm_kwargs : dict, (optional)
|
| 244 |
+
Any argument accepted by the tqdm constructor.
|
| 245 |
+
See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
|
| 246 |
+
Will be forwarded to `tqdm_cls`.
|
| 247 |
+
tqdm_cls: (optional)
|
| 248 |
+
subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
|
| 249 |
+
|
| 250 |
+
Examples
|
| 251 |
+
--------
|
| 252 |
+
>>> import fsspec
|
| 253 |
+
>>> from fsspec.callbacks import TqdmCallback
|
| 254 |
+
>>> fs = fsspec.filesystem("memory")
|
| 255 |
+
>>> path2distant_data = "/your-path"
|
| 256 |
+
>>> fs.upload(
|
| 257 |
+
".",
|
| 258 |
+
path2distant_data,
|
| 259 |
+
recursive=True,
|
| 260 |
+
callback=TqdmCallback(),
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
|
| 264 |
+
|
| 265 |
+
>>> fs.upload(
|
| 266 |
+
".",
|
| 267 |
+
path2distant_data,
|
| 268 |
+
recursive=True,
|
| 269 |
+
callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
You can also customize the progress bar by passing a subclass of `tqdm`.
|
| 273 |
+
|
| 274 |
+
.. code-block:: python
|
| 275 |
+
|
| 276 |
+
class TqdmFormat(tqdm):
|
| 277 |
+
'''Provides a `total_time` format parameter'''
|
| 278 |
+
@property
|
| 279 |
+
def format_dict(self):
|
| 280 |
+
d = super().format_dict
|
| 281 |
+
total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
|
| 282 |
+
d.update(total_time=self.format_interval(total_time) + " in total")
|
| 283 |
+
return d
|
| 284 |
+
|
| 285 |
+
>>> with TqdmCallback(
|
| 286 |
+
tqdm_kwargs={
|
| 287 |
+
"desc": "desc",
|
| 288 |
+
"bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
|
| 289 |
+
},
|
| 290 |
+
tqdm_cls=TqdmFormat,
|
| 291 |
+
) as callback:
|
| 292 |
+
fs.upload(".", path2distant_data, recursive=True, callback=callback)
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
def __init__(self, tqdm_kwargs=None, *args, **kwargs):
|
| 296 |
+
try:
|
| 297 |
+
from tqdm import tqdm
|
| 298 |
+
|
| 299 |
+
except ImportError as exce:
|
| 300 |
+
raise ImportError(
|
| 301 |
+
"Using TqdmCallback requires tqdm to be installed"
|
| 302 |
+
) from exce
|
| 303 |
+
|
| 304 |
+
self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
|
| 305 |
+
self._tqdm_kwargs = tqdm_kwargs or {}
|
| 306 |
+
self.tqdm = None
|
| 307 |
+
super().__init__(*args, **kwargs)
|
| 308 |
+
|
| 309 |
+
def call(self, *args, **kwargs):
|
| 310 |
+
if self.tqdm is None:
|
| 311 |
+
self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
|
| 312 |
+
self.tqdm.total = self.size
|
| 313 |
+
self.tqdm.update(self.value - self.tqdm.n)
|
| 314 |
+
|
| 315 |
+
def close(self):
|
| 316 |
+
if self.tqdm is not None:
|
| 317 |
+
self.tqdm.close()
|
| 318 |
+
self.tqdm = None
|
| 319 |
+
|
| 320 |
+
def __del__(self):
|
| 321 |
+
return self.close()
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
|
venv/lib/python3.10/site-packages/fsspec/compression.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper functions for a standard streaming compression API"""
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from zipfile import ZipFile
|
| 5 |
+
|
| 6 |
+
import fsspec.utils
|
| 7 |
+
from fsspec.spec import AbstractBufferedFile
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def noop_file(file, mode, **kwargs):
|
| 11 |
+
return file
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# TODO: files should also be available as contexts
|
| 15 |
+
# should be functions of the form func(infile, mode=, **kwargs) -> file-like
|
| 16 |
+
compr = {None: noop_file}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def register_compression(name, callback, extensions, force=False):
|
| 20 |
+
"""Register an "inferable" file compression type.
|
| 21 |
+
|
| 22 |
+
Registers transparent file compression type for use with fsspec.open.
|
| 23 |
+
Compression can be specified by name in open, or "infer"-ed for any files
|
| 24 |
+
ending with the given extensions.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
name: (str) The compression type name. Eg. "gzip".
|
| 28 |
+
callback: A callable of form (infile, mode, **kwargs) -> file-like.
|
| 29 |
+
Accepts an input file-like object, the target mode and kwargs.
|
| 30 |
+
Returns a wrapped file-like object.
|
| 31 |
+
extensions: (str, Iterable[str]) A file extension, or list of file
|
| 32 |
+
extensions for which to infer this compression scheme. Eg. "gz".
|
| 33 |
+
force: (bool) Force re-registration of compression type or extensions.
|
| 34 |
+
|
| 35 |
+
Raises:
|
| 36 |
+
ValueError: If name or extensions already registered, and not force.
|
| 37 |
+
|
| 38 |
+
"""
|
| 39 |
+
if isinstance(extensions, str):
|
| 40 |
+
extensions = [extensions]
|
| 41 |
+
|
| 42 |
+
# Validate registration
|
| 43 |
+
if name in compr and not force:
|
| 44 |
+
raise ValueError(f"Duplicate compression registration: {name}")
|
| 45 |
+
|
| 46 |
+
for ext in extensions:
|
| 47 |
+
if ext in fsspec.utils.compressions and not force:
|
| 48 |
+
raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
|
| 49 |
+
|
| 50 |
+
compr[name] = callback
|
| 51 |
+
|
| 52 |
+
for ext in extensions:
|
| 53 |
+
fsspec.utils.compressions[ext] = name
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def unzip(infile, mode="rb", filename=None, **kwargs):
|
| 57 |
+
if "r" not in mode:
|
| 58 |
+
filename = filename or "file"
|
| 59 |
+
z = ZipFile(infile, mode="w", **kwargs)
|
| 60 |
+
fo = z.open(filename, mode="w")
|
| 61 |
+
fo.close = lambda closer=fo.close: closer() or z.close()
|
| 62 |
+
return fo
|
| 63 |
+
z = ZipFile(infile)
|
| 64 |
+
if filename is None:
|
| 65 |
+
filename = z.namelist()[0]
|
| 66 |
+
return z.open(filename, mode="r", **kwargs)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
register_compression("zip", unzip, "zip")
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
from bz2 import BZ2File
|
| 73 |
+
except ImportError:
|
| 74 |
+
pass
|
| 75 |
+
else:
|
| 76 |
+
register_compression("bz2", BZ2File, "bz2")
|
| 77 |
+
|
| 78 |
+
try: # pragma: no cover
|
| 79 |
+
from isal import igzip
|
| 80 |
+
|
| 81 |
+
def isal(infile, mode="rb", **kwargs):
|
| 82 |
+
return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
|
| 83 |
+
|
| 84 |
+
register_compression("gzip", isal, "gz")
|
| 85 |
+
except ImportError:
|
| 86 |
+
from gzip import GzipFile
|
| 87 |
+
|
| 88 |
+
register_compression(
|
| 89 |
+
"gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
from lzma import LZMAFile
|
| 94 |
+
|
| 95 |
+
register_compression("lzma", LZMAFile, "lzma")
|
| 96 |
+
register_compression("xz", LZMAFile, "xz")
|
| 97 |
+
except ImportError:
|
| 98 |
+
pass
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
import lzmaffi
|
| 102 |
+
|
| 103 |
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
| 104 |
+
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
| 105 |
+
except ImportError:
|
| 106 |
+
pass
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class SnappyFile(AbstractBufferedFile):
|
| 110 |
+
def __init__(self, infile, mode, **kwargs):
|
| 111 |
+
import snappy
|
| 112 |
+
|
| 113 |
+
super().__init__(
|
| 114 |
+
fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
|
| 115 |
+
)
|
| 116 |
+
self.infile = infile
|
| 117 |
+
if "r" in mode:
|
| 118 |
+
self.codec = snappy.StreamDecompressor()
|
| 119 |
+
else:
|
| 120 |
+
self.codec = snappy.StreamCompressor()
|
| 121 |
+
|
| 122 |
+
def _upload_chunk(self, final=False):
|
| 123 |
+
self.buffer.seek(0)
|
| 124 |
+
out = self.codec.add_chunk(self.buffer.read())
|
| 125 |
+
self.infile.write(out)
|
| 126 |
+
return True
|
| 127 |
+
|
| 128 |
+
def seek(self, loc, whence=0):
|
| 129 |
+
raise NotImplementedError("SnappyFile is not seekable")
|
| 130 |
+
|
| 131 |
+
def seekable(self):
|
| 132 |
+
return False
|
| 133 |
+
|
| 134 |
+
def _fetch_range(self, start, end):
|
| 135 |
+
"""Get the specified set of bytes from remote"""
|
| 136 |
+
data = self.infile.read(end - start)
|
| 137 |
+
return self.codec.decompress(data)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
try:
|
| 141 |
+
import snappy
|
| 142 |
+
|
| 143 |
+
snappy.compress(b"")
|
| 144 |
+
# Snappy may use the .sz file extension, but this is not part of the
|
| 145 |
+
# standard implementation.
|
| 146 |
+
register_compression("snappy", SnappyFile, [])
|
| 147 |
+
|
| 148 |
+
except (ImportError, NameError, AttributeError):
|
| 149 |
+
pass
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
import lz4.frame
|
| 153 |
+
|
| 154 |
+
register_compression("lz4", lz4.frame.open, "lz4")
|
| 155 |
+
except ImportError:
|
| 156 |
+
pass
|
| 157 |
+
|
| 158 |
+
try:
|
| 159 |
+
if sys.version_info >= (3, 14):
|
| 160 |
+
from compression import zstd
|
| 161 |
+
else:
|
| 162 |
+
from backports import zstd
|
| 163 |
+
|
| 164 |
+
register_compression("zstd", zstd.ZstdFile, "zst")
|
| 165 |
+
except ImportError:
|
| 166 |
+
try:
|
| 167 |
+
import zstandard as zstd
|
| 168 |
+
|
| 169 |
+
def zstandard_file(infile, mode="rb"):
|
| 170 |
+
if "r" in mode:
|
| 171 |
+
cctx = zstd.ZstdDecompressor()
|
| 172 |
+
return cctx.stream_reader(infile)
|
| 173 |
+
else:
|
| 174 |
+
cctx = zstd.ZstdCompressor(level=10)
|
| 175 |
+
return cctx.stream_writer(infile)
|
| 176 |
+
|
| 177 |
+
register_compression("zstd", zstandard_file, "zst")
|
| 178 |
+
except ImportError:
|
| 179 |
+
pass
|
| 180 |
+
pass
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def available_compressions():
|
| 184 |
+
"""Return a list of the implemented compressions."""
|
| 185 |
+
return list(compr)
|
venv/lib/python3.10/site-packages/fsspec/config.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import configparser
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import warnings
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
conf: dict[str, dict[str, Any]] = {}
|
| 10 |
+
default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
|
| 11 |
+
conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def set_conf_env(conf_dict, envdict=os.environ):
|
| 15 |
+
"""Set config values from environment variables
|
| 16 |
+
|
| 17 |
+
Looks for variables of the form ``FSSPEC_<protocol>`` and
|
| 18 |
+
``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
|
| 19 |
+
as a json dictionary and used to ``update`` the config of the
|
| 20 |
+
corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
|
| 21 |
+
attempt to convert the string value, but the kwarg keys will be lower-cased.
|
| 22 |
+
|
| 23 |
+
The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
|
| 24 |
+
``FSSPEC_<protocol>`` ones.
|
| 25 |
+
|
| 26 |
+
Parameters
|
| 27 |
+
----------
|
| 28 |
+
conf_dict : dict(str, dict)
|
| 29 |
+
This dict will be mutated
|
| 30 |
+
envdict : dict-like(str, str)
|
| 31 |
+
Source for the values - usually the real environment
|
| 32 |
+
"""
|
| 33 |
+
kwarg_keys = []
|
| 34 |
+
for key in envdict:
|
| 35 |
+
if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
|
| 36 |
+
if key.count("_") > 1:
|
| 37 |
+
kwarg_keys.append(key)
|
| 38 |
+
continue
|
| 39 |
+
try:
|
| 40 |
+
value = json.loads(envdict[key])
|
| 41 |
+
except json.decoder.JSONDecodeError as ex:
|
| 42 |
+
warnings.warn(
|
| 43 |
+
f"Ignoring environment variable {key} due to a parse failure: {ex}"
|
| 44 |
+
)
|
| 45 |
+
else:
|
| 46 |
+
if isinstance(value, dict):
|
| 47 |
+
_, proto = key.split("_", 1)
|
| 48 |
+
conf_dict.setdefault(proto.lower(), {}).update(value)
|
| 49 |
+
else:
|
| 50 |
+
warnings.warn(
|
| 51 |
+
f"Ignoring environment variable {key} due to not being a dict:"
|
| 52 |
+
f" {type(value)}"
|
| 53 |
+
)
|
| 54 |
+
elif key.startswith("FSSPEC"):
|
| 55 |
+
warnings.warn(
|
| 56 |
+
f"Ignoring environment variable {key} due to having an unexpected name"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
for key in kwarg_keys:
|
| 60 |
+
_, proto, kwarg = key.split("_", 2)
|
| 61 |
+
conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def set_conf_files(cdir, conf_dict):
|
| 65 |
+
"""Set config values from files
|
| 66 |
+
|
| 67 |
+
Scans for INI and JSON files in the given dictionary, and uses their
|
| 68 |
+
contents to set the config. In case of repeated values, later values
|
| 69 |
+
win.
|
| 70 |
+
|
| 71 |
+
In the case of INI files, all values are strings, and these will not
|
| 72 |
+
be converted.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
cdir : str
|
| 77 |
+
Directory to search
|
| 78 |
+
conf_dict : dict(str, dict)
|
| 79 |
+
This dict will be mutated
|
| 80 |
+
"""
|
| 81 |
+
if not os.path.isdir(cdir):
|
| 82 |
+
return
|
| 83 |
+
allfiles = sorted(os.listdir(cdir))
|
| 84 |
+
for fn in allfiles:
|
| 85 |
+
if fn.endswith(".ini"):
|
| 86 |
+
ini = configparser.ConfigParser()
|
| 87 |
+
ini.read(os.path.join(cdir, fn))
|
| 88 |
+
for key in ini:
|
| 89 |
+
if key == "DEFAULT":
|
| 90 |
+
continue
|
| 91 |
+
conf_dict.setdefault(key, {}).update(dict(ini[key]))
|
| 92 |
+
if fn.endswith(".json"):
|
| 93 |
+
with open(os.path.join(cdir, fn)) as f:
|
| 94 |
+
js = json.load(f)
|
| 95 |
+
for key in js:
|
| 96 |
+
conf_dict.setdefault(key, {}).update(dict(js[key]))
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def apply_config(cls, kwargs, conf_dict=None):
|
| 100 |
+
"""Supply default values for kwargs when instantiating class
|
| 101 |
+
|
| 102 |
+
Augments the passed kwargs, by finding entries in the config dict
|
| 103 |
+
which match the classes ``.protocol`` attribute (one or more str)
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
cls : file system implementation
|
| 108 |
+
kwargs : dict
|
| 109 |
+
conf_dict : dict of dict
|
| 110 |
+
Typically this is the global configuration
|
| 111 |
+
|
| 112 |
+
Returns
|
| 113 |
+
-------
|
| 114 |
+
dict : the modified set of kwargs
|
| 115 |
+
"""
|
| 116 |
+
if conf_dict is None:
|
| 117 |
+
conf_dict = conf
|
| 118 |
+
protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
|
| 119 |
+
kw = {}
|
| 120 |
+
for proto in protos:
|
| 121 |
+
# default kwargs from the current state of the config
|
| 122 |
+
if proto in conf_dict:
|
| 123 |
+
kw.update(conf_dict[proto])
|
| 124 |
+
# explicit kwargs always win
|
| 125 |
+
kw.update(**kwargs)
|
| 126 |
+
kwargs = kw
|
| 127 |
+
return kwargs
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
set_conf_files(conf_dir, conf)
|
| 131 |
+
set_conf_env(conf)
|
venv/lib/python3.10/site-packages/fsspec/conftest.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import subprocess
|
| 4 |
+
import sys
|
| 5 |
+
import time
|
| 6 |
+
from collections import deque
|
| 7 |
+
from collections.abc import Generator, Sequence
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
import fsspec
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@pytest.fixture()
|
| 15 |
+
def m():
|
| 16 |
+
"""
|
| 17 |
+
Fixture providing a memory filesystem.
|
| 18 |
+
"""
|
| 19 |
+
m = fsspec.filesystem("memory")
|
| 20 |
+
m.store.clear()
|
| 21 |
+
m.pseudo_dirs.clear()
|
| 22 |
+
m.pseudo_dirs.append("")
|
| 23 |
+
try:
|
| 24 |
+
yield m
|
| 25 |
+
finally:
|
| 26 |
+
m.store.clear()
|
| 27 |
+
m.pseudo_dirs.clear()
|
| 28 |
+
m.pseudo_dirs.append("")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class InstanceCacheInspector:
|
| 32 |
+
"""
|
| 33 |
+
Helper class to inspect instance caches of filesystem classes in tests.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def clear(self) -> None:
|
| 37 |
+
"""
|
| 38 |
+
Clear instance caches of all currently imported filesystem classes.
|
| 39 |
+
"""
|
| 40 |
+
classes = deque([fsspec.spec.AbstractFileSystem])
|
| 41 |
+
while classes:
|
| 42 |
+
cls = classes.popleft()
|
| 43 |
+
cls.clear_instance_cache()
|
| 44 |
+
classes.extend(cls.__subclasses__())
|
| 45 |
+
|
| 46 |
+
def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
|
| 47 |
+
"""
|
| 48 |
+
Gather counts of filesystem instances in the instance caches
|
| 49 |
+
of all currently imported filesystem classes.
|
| 50 |
+
|
| 51 |
+
Parameters
|
| 52 |
+
----------
|
| 53 |
+
omit_zero:
|
| 54 |
+
Whether to omit instance types with no cached instances.
|
| 55 |
+
"""
|
| 56 |
+
out: dict[str, int] = {}
|
| 57 |
+
classes = deque([fsspec.spec.AbstractFileSystem])
|
| 58 |
+
while classes:
|
| 59 |
+
cls = classes.popleft()
|
| 60 |
+
count = len(cls._cache) # there is no public interface for the cache
|
| 61 |
+
# note: skip intermediate AbstractFileSystem subclasses
|
| 62 |
+
# if they proxy the protocol attribute via a property.
|
| 63 |
+
if isinstance(cls.protocol, (Sequence, str)):
|
| 64 |
+
key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
|
| 65 |
+
if count or not omit_zero:
|
| 66 |
+
out[key] = count
|
| 67 |
+
classes.extend(cls.__subclasses__())
|
| 68 |
+
return out
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@pytest.fixture(scope="function", autouse=True)
|
| 72 |
+
def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
|
| 73 |
+
"""
|
| 74 |
+
Fixture to ensure empty filesystem instance caches before and after a test.
|
| 75 |
+
|
| 76 |
+
Used by default for all tests.
|
| 77 |
+
Clears caches of all imported filesystem classes.
|
| 78 |
+
Can be used to write test assertions about instance caches.
|
| 79 |
+
|
| 80 |
+
Usage:
|
| 81 |
+
|
| 82 |
+
def test_something(instance_caches):
|
| 83 |
+
# Test code here
|
| 84 |
+
fsspec.open("file://abc")
|
| 85 |
+
fsspec.open("memory://foo/bar")
|
| 86 |
+
|
| 87 |
+
# Test assertion
|
| 88 |
+
assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
|
| 89 |
+
|
| 90 |
+
Returns
|
| 91 |
+
-------
|
| 92 |
+
instance_caches: An instance cache inspector for clearing and inspecting caches.
|
| 93 |
+
"""
|
| 94 |
+
ic = InstanceCacheInspector()
|
| 95 |
+
|
| 96 |
+
ic.clear()
|
| 97 |
+
try:
|
| 98 |
+
yield ic
|
| 99 |
+
finally:
|
| 100 |
+
ic.clear()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@pytest.fixture(scope="function")
|
| 104 |
+
def ftp_writable(tmpdir):
|
| 105 |
+
"""
|
| 106 |
+
Fixture providing a writable FTP filesystem.
|
| 107 |
+
"""
|
| 108 |
+
pytest.importorskip("pyftpdlib")
|
| 109 |
+
|
| 110 |
+
d = str(tmpdir)
|
| 111 |
+
with open(os.path.join(d, "out"), "wb") as f:
|
| 112 |
+
f.write(b"hello" * 10000)
|
| 113 |
+
P = subprocess.Popen(
|
| 114 |
+
[sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
|
| 115 |
+
)
|
| 116 |
+
try:
|
| 117 |
+
time.sleep(1)
|
| 118 |
+
yield "localhost", 2121, "user", "pass"
|
| 119 |
+
finally:
|
| 120 |
+
P.terminate()
|
| 121 |
+
P.wait()
|
| 122 |
+
try:
|
| 123 |
+
shutil.rmtree(tmpdir)
|
| 124 |
+
except Exception:
|
| 125 |
+
pass
|
venv/lib/python3.10/site-packages/fsspec/core.py
ADDED
|
@@ -0,0 +1,760 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
from glob import has_magic
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# for backwards compat, we export cache things from here too
|
| 11 |
+
from fsspec.caching import ( # noqa: F401
|
| 12 |
+
BaseCache,
|
| 13 |
+
BlockCache,
|
| 14 |
+
BytesCache,
|
| 15 |
+
MMapCache,
|
| 16 |
+
ReadAheadCache,
|
| 17 |
+
caches,
|
| 18 |
+
)
|
| 19 |
+
from fsspec.compression import compr
|
| 20 |
+
from fsspec.config import conf
|
| 21 |
+
from fsspec.registry import available_protocols, filesystem, get_filesystem_class
|
| 22 |
+
from fsspec.utils import (
|
| 23 |
+
_unstrip_protocol,
|
| 24 |
+
build_name_function,
|
| 25 |
+
infer_compression,
|
| 26 |
+
stringify_path,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("fsspec")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class OpenFile:
|
| 33 |
+
"""
|
| 34 |
+
File-like object to be used in a context
|
| 35 |
+
|
| 36 |
+
Can layer (buffered) text-mode and compression over any file-system, which
|
| 37 |
+
are typically binary-only.
|
| 38 |
+
|
| 39 |
+
These instances are safe to serialize, as the low-level file object
|
| 40 |
+
is not created until invoked using ``with``.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
fs: FileSystem
|
| 45 |
+
The file system to use for opening the file. Should be a subclass or duck-type
|
| 46 |
+
with ``fsspec.spec.AbstractFileSystem``
|
| 47 |
+
path: str
|
| 48 |
+
Location to open
|
| 49 |
+
mode: str like 'rb', optional
|
| 50 |
+
Mode of the opened file
|
| 51 |
+
compression: str or None, optional
|
| 52 |
+
Compression to apply
|
| 53 |
+
encoding: str or None, optional
|
| 54 |
+
The encoding to use if opened in text mode.
|
| 55 |
+
errors: str or None, optional
|
| 56 |
+
How to handle encoding errors if opened in text mode.
|
| 57 |
+
newline: None or str
|
| 58 |
+
Passed to TextIOWrapper in text mode, how to handle line endings.
|
| 59 |
+
autoopen: bool
|
| 60 |
+
If True, calls open() immediately. Mostly used by pickle
|
| 61 |
+
pos: int
|
| 62 |
+
If given and autoopen is True, seek to this location immediately
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
fs,
|
| 68 |
+
path,
|
| 69 |
+
mode="rb",
|
| 70 |
+
compression=None,
|
| 71 |
+
encoding=None,
|
| 72 |
+
errors=None,
|
| 73 |
+
newline=None,
|
| 74 |
+
):
|
| 75 |
+
self.fs = fs
|
| 76 |
+
self.path = path
|
| 77 |
+
self.mode = mode
|
| 78 |
+
self.compression = get_compression(path, compression)
|
| 79 |
+
self.encoding = encoding
|
| 80 |
+
self.errors = errors
|
| 81 |
+
self.newline = newline
|
| 82 |
+
self.fobjects = []
|
| 83 |
+
|
| 84 |
+
def __reduce__(self):
|
| 85 |
+
return (
|
| 86 |
+
OpenFile,
|
| 87 |
+
(
|
| 88 |
+
self.fs,
|
| 89 |
+
self.path,
|
| 90 |
+
self.mode,
|
| 91 |
+
self.compression,
|
| 92 |
+
self.encoding,
|
| 93 |
+
self.errors,
|
| 94 |
+
self.newline,
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def __repr__(self):
|
| 99 |
+
return f"<OpenFile '{self.path}'>"
|
| 100 |
+
|
| 101 |
+
def __enter__(self):
|
| 102 |
+
mode = self.mode.replace("t", "").replace("b", "") + "b"
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
f = self.fs.open(self.path, mode=mode)
|
| 106 |
+
except FileNotFoundError as e:
|
| 107 |
+
if has_magic(self.path):
|
| 108 |
+
raise FileNotFoundError(
|
| 109 |
+
"%s not found. The URL contains glob characters: you maybe needed\n"
|
| 110 |
+
"to pass expand=True in fsspec.open() or the storage_options of \n"
|
| 111 |
+
"your library. You can also set the config value 'open_expand'\n"
|
| 112 |
+
"before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
|
| 113 |
+
self.path,
|
| 114 |
+
) from e
|
| 115 |
+
raise
|
| 116 |
+
|
| 117 |
+
self.fobjects = [f]
|
| 118 |
+
|
| 119 |
+
if self.compression is not None:
|
| 120 |
+
compress = compr[self.compression]
|
| 121 |
+
f = compress(f, mode=mode[0])
|
| 122 |
+
self.fobjects.append(f)
|
| 123 |
+
|
| 124 |
+
if "b" not in self.mode:
|
| 125 |
+
# assume, for example, that 'r' is equivalent to 'rt' as in builtin
|
| 126 |
+
f = PickleableTextIOWrapper(
|
| 127 |
+
f, encoding=self.encoding, errors=self.errors, newline=self.newline
|
| 128 |
+
)
|
| 129 |
+
self.fobjects.append(f)
|
| 130 |
+
|
| 131 |
+
return self.fobjects[-1]
|
| 132 |
+
|
| 133 |
+
def __exit__(self, *args):
|
| 134 |
+
self.close()
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
def full_name(self):
|
| 138 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 139 |
+
|
| 140 |
+
def open(self):
|
| 141 |
+
"""Materialise this as a real open file without context
|
| 142 |
+
|
| 143 |
+
The OpenFile object should be explicitly closed to avoid enclosed file
|
| 144 |
+
instances persisting. You must, therefore, keep a reference to the OpenFile
|
| 145 |
+
during the life of the file-like it generates.
|
| 146 |
+
"""
|
| 147 |
+
return self.__enter__()
|
| 148 |
+
|
| 149 |
+
def close(self):
|
| 150 |
+
"""Close all encapsulated file objects"""
|
| 151 |
+
for f in reversed(self.fobjects):
|
| 152 |
+
if "r" not in self.mode and not f.closed:
|
| 153 |
+
f.flush()
|
| 154 |
+
f.close()
|
| 155 |
+
self.fobjects.clear()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class OpenFiles(list):
|
| 159 |
+
"""List of OpenFile instances
|
| 160 |
+
|
| 161 |
+
Can be used in a single context, which opens and closes all of the
|
| 162 |
+
contained files. Normal list access to get the elements works as
|
| 163 |
+
normal.
|
| 164 |
+
|
| 165 |
+
A special case is made for caching filesystems - the files will
|
| 166 |
+
be down/uploaded together at the start or end of the context, and
|
| 167 |
+
this may happen concurrently, if the target filesystem supports it.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
def __init__(self, *args, mode="rb", fs=None):
|
| 171 |
+
self.mode = mode
|
| 172 |
+
self.fs = fs
|
| 173 |
+
self.files = []
|
| 174 |
+
super().__init__(*args)
|
| 175 |
+
|
| 176 |
+
def __enter__(self):
|
| 177 |
+
if self.fs is None:
|
| 178 |
+
raise ValueError("Context has already been used")
|
| 179 |
+
|
| 180 |
+
fs = self.fs
|
| 181 |
+
while True:
|
| 182 |
+
if hasattr(fs, "open_many"):
|
| 183 |
+
# check for concurrent cache download; or set up for upload
|
| 184 |
+
self.files = fs.open_many(self)
|
| 185 |
+
return self.files
|
| 186 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 187 |
+
fs = fs.fs
|
| 188 |
+
else:
|
| 189 |
+
break
|
| 190 |
+
return [s.__enter__() for s in self]
|
| 191 |
+
|
| 192 |
+
def __exit__(self, *args):
|
| 193 |
+
fs = self.fs
|
| 194 |
+
[s.__exit__(*args) for s in self]
|
| 195 |
+
if "r" not in self.mode:
|
| 196 |
+
while True:
|
| 197 |
+
if hasattr(fs, "open_many"):
|
| 198 |
+
# check for concurrent cache upload
|
| 199 |
+
fs.commit_many(self.files)
|
| 200 |
+
return
|
| 201 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 202 |
+
fs = fs.fs
|
| 203 |
+
else:
|
| 204 |
+
break
|
| 205 |
+
|
| 206 |
+
def __getitem__(self, item):
|
| 207 |
+
out = super().__getitem__(item)
|
| 208 |
+
if isinstance(item, slice):
|
| 209 |
+
return OpenFiles(out, mode=self.mode, fs=self.fs)
|
| 210 |
+
return out
|
| 211 |
+
|
| 212 |
+
def __repr__(self):
|
| 213 |
+
return f"<List of {len(self)} OpenFile instances>"
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def open_files(
|
| 217 |
+
urlpath,
|
| 218 |
+
mode="rb",
|
| 219 |
+
compression=None,
|
| 220 |
+
encoding="utf8",
|
| 221 |
+
errors=None,
|
| 222 |
+
name_function=None,
|
| 223 |
+
num=1,
|
| 224 |
+
protocol=None,
|
| 225 |
+
newline=None,
|
| 226 |
+
auto_mkdir=True,
|
| 227 |
+
expand=True,
|
| 228 |
+
**kwargs,
|
| 229 |
+
):
|
| 230 |
+
"""Given a path or paths, return a list of ``OpenFile`` objects.
|
| 231 |
+
|
| 232 |
+
For writing, a str path must contain the "*" character, which will be filled
|
| 233 |
+
in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
|
| 234 |
+
|
| 235 |
+
For either reading or writing, can instead provide explicit list of paths.
|
| 236 |
+
|
| 237 |
+
Parameters
|
| 238 |
+
----------
|
| 239 |
+
urlpath: string or list
|
| 240 |
+
Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
|
| 241 |
+
to read from alternative filesystems. To read from multiple files you
|
| 242 |
+
can pass a globstring or a list of paths, with the caveat that they
|
| 243 |
+
must all have the same protocol.
|
| 244 |
+
mode: 'rb', 'wt', etc.
|
| 245 |
+
compression: string or None
|
| 246 |
+
If given, open file using compression codec. Can either be a compression
|
| 247 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 248 |
+
compression from the filename suffix.
|
| 249 |
+
encoding: str
|
| 250 |
+
For text mode only
|
| 251 |
+
errors: None or str
|
| 252 |
+
Passed to TextIOWrapper in text mode
|
| 253 |
+
name_function: function or None
|
| 254 |
+
if opening a set of files for writing, those files do not yet exist,
|
| 255 |
+
so we need to generate their names by formatting the urlpath for
|
| 256 |
+
each sequence number
|
| 257 |
+
num: int [1]
|
| 258 |
+
if writing mode, number of files we expect to create (passed to
|
| 259 |
+
name+function)
|
| 260 |
+
protocol: str or None
|
| 261 |
+
If given, overrides the protocol found in the URL.
|
| 262 |
+
newline: bytes or None
|
| 263 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 264 |
+
if blank, uses no translation.
|
| 265 |
+
auto_mkdir: bool (True)
|
| 266 |
+
If in write mode, this will ensure the target directory exists before
|
| 267 |
+
writing, by calling ``fs.mkdirs(exist_ok=True)``.
|
| 268 |
+
expand: bool
|
| 269 |
+
**kwargs: dict
|
| 270 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 271 |
+
host, port, username, password, etc.
|
| 272 |
+
|
| 273 |
+
Examples
|
| 274 |
+
--------
|
| 275 |
+
>>> files = open_files('2015-*-*.csv') # doctest: +SKIP
|
| 276 |
+
>>> files = open_files(
|
| 277 |
+
... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
|
| 278 |
+
... ) # doctest: +SKIP
|
| 279 |
+
|
| 280 |
+
Returns
|
| 281 |
+
-------
|
| 282 |
+
An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
|
| 283 |
+
be used as a single context
|
| 284 |
+
|
| 285 |
+
Notes
|
| 286 |
+
-----
|
| 287 |
+
For a full list of the available protocols and the implementations that
|
| 288 |
+
they map across to see the latest online documentation:
|
| 289 |
+
|
| 290 |
+
- For implementations built into ``fsspec`` see
|
| 291 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 292 |
+
- For implementations in separate packages see
|
| 293 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 294 |
+
"""
|
| 295 |
+
fs, fs_token, paths = get_fs_token_paths(
|
| 296 |
+
urlpath,
|
| 297 |
+
mode,
|
| 298 |
+
num=num,
|
| 299 |
+
name_function=name_function,
|
| 300 |
+
storage_options=kwargs,
|
| 301 |
+
protocol=protocol,
|
| 302 |
+
expand=expand,
|
| 303 |
+
)
|
| 304 |
+
if fs.protocol == "file":
|
| 305 |
+
fs.auto_mkdir = auto_mkdir
|
| 306 |
+
elif "r" not in mode and auto_mkdir:
|
| 307 |
+
parents = {fs._parent(path) for path in paths}
|
| 308 |
+
for parent in parents:
|
| 309 |
+
try:
|
| 310 |
+
fs.makedirs(parent, exist_ok=True)
|
| 311 |
+
except PermissionError:
|
| 312 |
+
pass
|
| 313 |
+
return OpenFiles(
|
| 314 |
+
[
|
| 315 |
+
OpenFile(
|
| 316 |
+
fs,
|
| 317 |
+
path,
|
| 318 |
+
mode=mode,
|
| 319 |
+
compression=compression,
|
| 320 |
+
encoding=encoding,
|
| 321 |
+
errors=errors,
|
| 322 |
+
newline=newline,
|
| 323 |
+
)
|
| 324 |
+
for path in paths
|
| 325 |
+
],
|
| 326 |
+
mode=mode,
|
| 327 |
+
fs=fs,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _un_chain(path, kwargs):
|
| 332 |
+
# Avoid a circular import
|
| 333 |
+
from fsspec.implementations.chained import ChainedFileSystem
|
| 334 |
+
|
| 335 |
+
if "::" in path:
|
| 336 |
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
| 337 |
+
known_protocols = set(available_protocols())
|
| 338 |
+
bits = []
|
| 339 |
+
|
| 340 |
+
# split on '::', then ensure each bit has a protocol
|
| 341 |
+
for p in path.split("::"):
|
| 342 |
+
if p in known_protocols:
|
| 343 |
+
bits.append(p + "://")
|
| 344 |
+
elif "://" in p or x.match(p):
|
| 345 |
+
bits.append(p)
|
| 346 |
+
else:
|
| 347 |
+
bits.append(p + "://")
|
| 348 |
+
else:
|
| 349 |
+
bits = [path]
|
| 350 |
+
|
| 351 |
+
# [[url, protocol, kwargs], ...]
|
| 352 |
+
out = []
|
| 353 |
+
previous_bit = None
|
| 354 |
+
kwargs = kwargs.copy()
|
| 355 |
+
|
| 356 |
+
for bit in reversed(bits):
|
| 357 |
+
protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
|
| 358 |
+
cls = get_filesystem_class(protocol)
|
| 359 |
+
extra_kwargs = cls._get_kwargs_from_urls(bit)
|
| 360 |
+
kws = kwargs.pop(protocol, {})
|
| 361 |
+
|
| 362 |
+
if bit is bits[0]:
|
| 363 |
+
kws.update(kwargs)
|
| 364 |
+
|
| 365 |
+
kw = dict(
|
| 366 |
+
**{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
|
| 367 |
+
**kws,
|
| 368 |
+
)
|
| 369 |
+
bit = cls._strip_protocol(bit)
|
| 370 |
+
|
| 371 |
+
if (
|
| 372 |
+
"target_protocol" not in kw
|
| 373 |
+
and issubclass(cls, ChainedFileSystem)
|
| 374 |
+
and not bit
|
| 375 |
+
):
|
| 376 |
+
# replace bit if we are chaining and no path given
|
| 377 |
+
bit = previous_bit
|
| 378 |
+
|
| 379 |
+
out.append((bit, protocol, kw))
|
| 380 |
+
previous_bit = bit
|
| 381 |
+
|
| 382 |
+
out.reverse()
|
| 383 |
+
return out
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def url_to_fs(url, **kwargs):
|
| 387 |
+
"""
|
| 388 |
+
Turn fully-qualified and potentially chained URL into filesystem instance
|
| 389 |
+
|
| 390 |
+
Parameters
|
| 391 |
+
----------
|
| 392 |
+
url : str
|
| 393 |
+
The fsspec-compatible URL
|
| 394 |
+
**kwargs: dict
|
| 395 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 396 |
+
host, port, username, password, etc.
|
| 397 |
+
|
| 398 |
+
Returns
|
| 399 |
+
-------
|
| 400 |
+
filesystem : FileSystem
|
| 401 |
+
The new filesystem discovered from ``url`` and created with
|
| 402 |
+
``**kwargs``.
|
| 403 |
+
urlpath : str
|
| 404 |
+
The file-systems-specific URL for ``url``.
|
| 405 |
+
"""
|
| 406 |
+
url = stringify_path(url)
|
| 407 |
+
# non-FS arguments that appear in fsspec.open()
|
| 408 |
+
# inspect could keep this in sync with open()'s signature
|
| 409 |
+
known_kwargs = {
|
| 410 |
+
"compression",
|
| 411 |
+
"encoding",
|
| 412 |
+
"errors",
|
| 413 |
+
"expand",
|
| 414 |
+
"mode",
|
| 415 |
+
"name_function",
|
| 416 |
+
"newline",
|
| 417 |
+
"num",
|
| 418 |
+
}
|
| 419 |
+
kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
|
| 420 |
+
chain = _un_chain(url, kwargs)
|
| 421 |
+
inkwargs = {}
|
| 422 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 423 |
+
for i, ch in enumerate(reversed(chain)):
|
| 424 |
+
urls, protocol, kw = ch
|
| 425 |
+
if i == len(chain) - 1:
|
| 426 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 427 |
+
continue
|
| 428 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 429 |
+
inkwargs["target_protocol"] = protocol
|
| 430 |
+
inkwargs["fo"] = urls
|
| 431 |
+
urlpath, protocol, _ = chain[0]
|
| 432 |
+
fs = filesystem(protocol, **inkwargs)
|
| 433 |
+
return fs, urlpath
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
DEFAULT_EXPAND = conf.get("open_expand", False)
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def open(
|
| 440 |
+
urlpath,
|
| 441 |
+
mode="rb",
|
| 442 |
+
compression=None,
|
| 443 |
+
encoding="utf8",
|
| 444 |
+
errors=None,
|
| 445 |
+
protocol=None,
|
| 446 |
+
newline=None,
|
| 447 |
+
expand=None,
|
| 448 |
+
**kwargs,
|
| 449 |
+
):
|
| 450 |
+
"""Given a path or paths, return one ``OpenFile`` object.
|
| 451 |
+
|
| 452 |
+
Parameters
|
| 453 |
+
----------
|
| 454 |
+
urlpath: string or list
|
| 455 |
+
Absolute or relative filepath. Prefix with a protocol like ``s3://``
|
| 456 |
+
to read from alternative filesystems. Should not include glob
|
| 457 |
+
character(s).
|
| 458 |
+
mode: 'rb', 'wt', etc.
|
| 459 |
+
compression: string or None
|
| 460 |
+
If given, open file using compression codec. Can either be a compression
|
| 461 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 462 |
+
compression from the filename suffix.
|
| 463 |
+
encoding: str
|
| 464 |
+
For text mode only
|
| 465 |
+
errors: None or str
|
| 466 |
+
Passed to TextIOWrapper in text mode
|
| 467 |
+
protocol: str or None
|
| 468 |
+
If given, overrides the protocol found in the URL.
|
| 469 |
+
newline: bytes or None
|
| 470 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 471 |
+
if blank, uses no translation.
|
| 472 |
+
expand: bool or None
|
| 473 |
+
Whether to regard file paths containing special glob characters as needing
|
| 474 |
+
expansion (finding the first match) or absolute. Setting False allows using
|
| 475 |
+
paths which do embed such characters. If None (default), this argument
|
| 476 |
+
takes its value from the DEFAULT_EXPAND module variable, which takes
|
| 477 |
+
its initial value from the "open_expand" config value at startup, which will
|
| 478 |
+
be False if not set.
|
| 479 |
+
**kwargs: dict
|
| 480 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 481 |
+
host, port, username, password, etc.
|
| 482 |
+
|
| 483 |
+
Examples
|
| 484 |
+
--------
|
| 485 |
+
>>> openfile = open('2015-01-01.csv') # doctest: +SKIP
|
| 486 |
+
>>> openfile = open(
|
| 487 |
+
... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
|
| 488 |
+
... ) # doctest: +SKIP
|
| 489 |
+
>>> with openfile as f:
|
| 490 |
+
... df = pd.read_csv(f) # doctest: +SKIP
|
| 491 |
+
...
|
| 492 |
+
|
| 493 |
+
Returns
|
| 494 |
+
-------
|
| 495 |
+
``OpenFile`` object.
|
| 496 |
+
|
| 497 |
+
Notes
|
| 498 |
+
-----
|
| 499 |
+
For a full list of the available protocols and the implementations that
|
| 500 |
+
they map across to see the latest online documentation:
|
| 501 |
+
|
| 502 |
+
- For implementations built into ``fsspec`` see
|
| 503 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 504 |
+
- For implementations in separate packages see
|
| 505 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 506 |
+
"""
|
| 507 |
+
expand = DEFAULT_EXPAND if expand is None else expand
|
| 508 |
+
out = open_files(
|
| 509 |
+
urlpath=[urlpath],
|
| 510 |
+
mode=mode,
|
| 511 |
+
compression=compression,
|
| 512 |
+
encoding=encoding,
|
| 513 |
+
errors=errors,
|
| 514 |
+
protocol=protocol,
|
| 515 |
+
newline=newline,
|
| 516 |
+
expand=expand,
|
| 517 |
+
**kwargs,
|
| 518 |
+
)
|
| 519 |
+
if not out:
|
| 520 |
+
raise FileNotFoundError(urlpath)
|
| 521 |
+
return out[0]
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
def open_local(
|
| 525 |
+
url: str | list[str] | Path | list[Path],
|
| 526 |
+
mode: str = "rb",
|
| 527 |
+
**storage_options: dict,
|
| 528 |
+
) -> str | list[str]:
|
| 529 |
+
"""Open file(s) which can be resolved to local
|
| 530 |
+
|
| 531 |
+
For files which either are local, or get downloaded upon open
|
| 532 |
+
(e.g., by file caching)
|
| 533 |
+
|
| 534 |
+
Parameters
|
| 535 |
+
----------
|
| 536 |
+
url: str or list(str)
|
| 537 |
+
mode: str
|
| 538 |
+
Must be read mode
|
| 539 |
+
storage_options:
|
| 540 |
+
passed on to FS for or used by open_files (e.g., compression)
|
| 541 |
+
"""
|
| 542 |
+
if "r" not in mode:
|
| 543 |
+
raise ValueError("Can only ensure local files when reading")
|
| 544 |
+
of = open_files(url, mode=mode, **storage_options)
|
| 545 |
+
if not getattr(of[0].fs, "local_file", False):
|
| 546 |
+
raise ValueError(
|
| 547 |
+
"open_local can only be used on a filesystem which"
|
| 548 |
+
" has attribute local_file=True"
|
| 549 |
+
)
|
| 550 |
+
with of as files:
|
| 551 |
+
paths = [f.name for f in files]
|
| 552 |
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
| 553 |
+
return paths[0]
|
| 554 |
+
return paths
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def get_compression(urlpath, compression):
|
| 558 |
+
if compression == "infer":
|
| 559 |
+
compression = infer_compression(urlpath)
|
| 560 |
+
if compression is not None and compression not in compr:
|
| 561 |
+
raise ValueError(f"Compression type {compression} not supported")
|
| 562 |
+
return compression
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
def split_protocol(urlpath):
|
| 566 |
+
"""Return protocol, path pair"""
|
| 567 |
+
urlpath = stringify_path(urlpath)
|
| 568 |
+
if "://" in urlpath:
|
| 569 |
+
protocol, path = urlpath.split("://", 1)
|
| 570 |
+
if len(protocol) > 1:
|
| 571 |
+
# excludes Windows paths
|
| 572 |
+
return protocol, path
|
| 573 |
+
if urlpath.startswith("data:"):
|
| 574 |
+
return urlpath.split(":", 1)
|
| 575 |
+
return None, urlpath
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
def strip_protocol(urlpath):
|
| 579 |
+
"""Return only path part of full URL, according to appropriate backend"""
|
| 580 |
+
protocol, _ = split_protocol(urlpath)
|
| 581 |
+
cls = get_filesystem_class(protocol)
|
| 582 |
+
return cls._strip_protocol(urlpath)
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
def expand_paths_if_needed(paths, mode, num, fs, name_function):
|
| 586 |
+
"""Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
|
| 587 |
+
in them (read mode).
|
| 588 |
+
|
| 589 |
+
:param paths: list of paths
|
| 590 |
+
mode: str
|
| 591 |
+
Mode in which to open files.
|
| 592 |
+
num: int
|
| 593 |
+
If opening in writing mode, number of files we expect to create.
|
| 594 |
+
fs: filesystem object
|
| 595 |
+
name_function: callable
|
| 596 |
+
If opening in writing mode, this callable is used to generate path
|
| 597 |
+
names. Names are generated for each partition by
|
| 598 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 599 |
+
:return: list of paths
|
| 600 |
+
"""
|
| 601 |
+
expanded_paths = []
|
| 602 |
+
paths = list(paths)
|
| 603 |
+
|
| 604 |
+
if "w" in mode: # read mode
|
| 605 |
+
if sum(1 for p in paths if "*" in p) > 1:
|
| 606 |
+
raise ValueError(
|
| 607 |
+
"When writing data, only one filename mask can be specified."
|
| 608 |
+
)
|
| 609 |
+
num = max(num, len(paths))
|
| 610 |
+
|
| 611 |
+
for curr_path in paths:
|
| 612 |
+
if "*" in curr_path:
|
| 613 |
+
# expand using name_function
|
| 614 |
+
expanded_paths.extend(_expand_paths(curr_path, name_function, num))
|
| 615 |
+
else:
|
| 616 |
+
expanded_paths.append(curr_path)
|
| 617 |
+
# if we generated more paths that asked for, trim the list
|
| 618 |
+
if len(expanded_paths) > num:
|
| 619 |
+
expanded_paths = expanded_paths[:num]
|
| 620 |
+
|
| 621 |
+
else: # read mode
|
| 622 |
+
for curr_path in paths:
|
| 623 |
+
if has_magic(curr_path):
|
| 624 |
+
# expand using glob
|
| 625 |
+
expanded_paths.extend(fs.glob(curr_path))
|
| 626 |
+
else:
|
| 627 |
+
expanded_paths.append(curr_path)
|
| 628 |
+
|
| 629 |
+
return expanded_paths
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
def get_fs_token_paths(
|
| 633 |
+
urlpath,
|
| 634 |
+
mode="rb",
|
| 635 |
+
num=1,
|
| 636 |
+
name_function=None,
|
| 637 |
+
storage_options=None,
|
| 638 |
+
protocol=None,
|
| 639 |
+
expand=True,
|
| 640 |
+
):
|
| 641 |
+
"""Filesystem, deterministic token, and paths from a urlpath and options.
|
| 642 |
+
|
| 643 |
+
Parameters
|
| 644 |
+
----------
|
| 645 |
+
urlpath: string or iterable
|
| 646 |
+
Absolute or relative filepath, URL (may include protocols like
|
| 647 |
+
``s3://``), or globstring pointing to data.
|
| 648 |
+
mode: str, optional
|
| 649 |
+
Mode in which to open files.
|
| 650 |
+
num: int, optional
|
| 651 |
+
If opening in writing mode, number of files we expect to create.
|
| 652 |
+
name_function: callable, optional
|
| 653 |
+
If opening in writing mode, this callable is used to generate path
|
| 654 |
+
names. Names are generated for each partition by
|
| 655 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 656 |
+
storage_options: dict, optional
|
| 657 |
+
Additional keywords to pass to the filesystem class.
|
| 658 |
+
protocol: str or None
|
| 659 |
+
To override the protocol specifier in the URL
|
| 660 |
+
expand: bool
|
| 661 |
+
Expand string paths for writing, assuming the path is a directory
|
| 662 |
+
"""
|
| 663 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 664 |
+
if not urlpath:
|
| 665 |
+
raise ValueError("empty urlpath sequence")
|
| 666 |
+
urlpath0 = stringify_path(next(iter(urlpath)))
|
| 667 |
+
else:
|
| 668 |
+
urlpath0 = stringify_path(urlpath)
|
| 669 |
+
storage_options = storage_options or {}
|
| 670 |
+
if protocol:
|
| 671 |
+
storage_options["protocol"] = protocol
|
| 672 |
+
chain = _un_chain(urlpath0, storage_options or {})
|
| 673 |
+
inkwargs = {}
|
| 674 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 675 |
+
for i, ch in enumerate(reversed(chain)):
|
| 676 |
+
urls, nested_protocol, kw = ch
|
| 677 |
+
if i == len(chain) - 1:
|
| 678 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 679 |
+
continue
|
| 680 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 681 |
+
inkwargs["target_protocol"] = nested_protocol
|
| 682 |
+
inkwargs["fo"] = urls
|
| 683 |
+
paths, protocol, _ = chain[0]
|
| 684 |
+
fs = filesystem(protocol, **inkwargs)
|
| 685 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 686 |
+
pchains = [
|
| 687 |
+
_un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
|
| 688 |
+
]
|
| 689 |
+
if len({pc[1] for pc in pchains}) > 1:
|
| 690 |
+
raise ValueError("Protocol mismatch getting fs from %s", urlpath)
|
| 691 |
+
paths = [pc[0] for pc in pchains]
|
| 692 |
+
else:
|
| 693 |
+
paths = fs._strip_protocol(paths)
|
| 694 |
+
if isinstance(paths, (list, tuple, set)):
|
| 695 |
+
if expand:
|
| 696 |
+
paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
|
| 697 |
+
elif not isinstance(paths, list):
|
| 698 |
+
paths = list(paths)
|
| 699 |
+
else:
|
| 700 |
+
if ("w" in mode or "x" in mode) and expand:
|
| 701 |
+
paths = _expand_paths(paths, name_function, num)
|
| 702 |
+
elif "*" in paths:
|
| 703 |
+
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
| 704 |
+
else:
|
| 705 |
+
paths = [paths]
|
| 706 |
+
|
| 707 |
+
return fs, fs._fs_token, paths
|
| 708 |
+
|
| 709 |
+
|
| 710 |
+
def _expand_paths(path, name_function, num):
|
| 711 |
+
if isinstance(path, str):
|
| 712 |
+
if path.count("*") > 1:
|
| 713 |
+
raise ValueError("Output path spec must contain exactly one '*'.")
|
| 714 |
+
elif "*" not in path:
|
| 715 |
+
path = os.path.join(path, "*.part")
|
| 716 |
+
|
| 717 |
+
if name_function is None:
|
| 718 |
+
name_function = build_name_function(num - 1)
|
| 719 |
+
|
| 720 |
+
paths = [path.replace("*", name_function(i)) for i in range(num)]
|
| 721 |
+
if paths != sorted(paths):
|
| 722 |
+
logger.warning(
|
| 723 |
+
"In order to preserve order between partitions"
|
| 724 |
+
" paths created with ``name_function`` should "
|
| 725 |
+
"sort to partition order"
|
| 726 |
+
)
|
| 727 |
+
elif isinstance(path, (tuple, list)):
|
| 728 |
+
assert len(path) == num
|
| 729 |
+
paths = list(path)
|
| 730 |
+
else:
|
| 731 |
+
raise ValueError(
|
| 732 |
+
"Path should be either\n"
|
| 733 |
+
"1. A list of paths: ['foo.json', 'bar.json', ...]\n"
|
| 734 |
+
"2. A directory: 'foo/\n"
|
| 735 |
+
"3. A path with a '*' in it: 'foo.*.json'"
|
| 736 |
+
)
|
| 737 |
+
return paths
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
class PickleableTextIOWrapper(io.TextIOWrapper):
|
| 741 |
+
"""TextIOWrapper cannot be pickled. This solves it.
|
| 742 |
+
|
| 743 |
+
Requires that ``buffer`` be pickleable, which all instances of
|
| 744 |
+
AbstractBufferedFile are.
|
| 745 |
+
"""
|
| 746 |
+
|
| 747 |
+
def __init__(
|
| 748 |
+
self,
|
| 749 |
+
buffer,
|
| 750 |
+
encoding=None,
|
| 751 |
+
errors=None,
|
| 752 |
+
newline=None,
|
| 753 |
+
line_buffering=False,
|
| 754 |
+
write_through=False,
|
| 755 |
+
):
|
| 756 |
+
self.args = buffer, encoding, errors, newline, line_buffering, write_through
|
| 757 |
+
super().__init__(*self.args)
|
| 758 |
+
|
| 759 |
+
def __reduce__(self):
|
| 760 |
+
return PickleableTextIOWrapper, self.args
|