Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/json.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/arrow.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/asyn_wrapper.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cached.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dask.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/data.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dirfs.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/github.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/http.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/local.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/memory.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/reference.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/smb.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/zip.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/asyn_wrapper.py +99 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/cache_mapper.py +75 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/cache_metadata.py +232 -0
- .venv/lib/python3.11/site-packages/fsspec/implementations/cached.py +929 -0
.venv/lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (2.19 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc
ADDED
|
Binary file (640 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc
ADDED
|
Binary file (4.71 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc
ADDED
|
Binary file (52.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc
ADDED
|
Binary file (41.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc
ADDED
|
Binary file (13.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc
ADDED
|
Binary file (8.05 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc
ADDED
|
Binary file (6.67 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc
ADDED
|
Binary file (3.4 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc
ADDED
|
Binary file (32.4 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc
ADDED
|
Binary file (4.72 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc
ADDED
|
Binary file (955 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc
ADDED
|
Binary file (17.2 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc
ADDED
|
Binary file (22.2 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc
ADDED
|
Binary file (23.3 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/json.cpython-311.pyc
ADDED
|
Binary file (8.16 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc
ADDED
|
Binary file (14 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc
ADDED
|
Binary file (17.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc
ADDED
|
Binary file (11.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc
ADDED
|
Binary file (5.11 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (32.2 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__init__.py
ADDED
|
File without changes
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (195 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/arrow.cpython-311.pyc
ADDED
|
Binary file (15.6 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/asyn_wrapper.cpython-311.pyc
ADDED
|
Binary file (5.1 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc
ADDED
|
Binary file (4.68 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cached.cpython-311.pyc
ADDED
|
Binary file (50 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dask.cpython-311.pyc
ADDED
|
Binary file (7.76 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/data.cpython-311.pyc
ADDED
|
Binary file (3.5 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc
ADDED
|
Binary file (20.1 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dirfs.cpython-311.pyc
ADDED
|
Binary file (27 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc
ADDED
|
Binary file (20.1 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc
ADDED
|
Binary file (6.65 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/github.cpython-311.pyc
ADDED
|
Binary file (12.9 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/http.cpython-311.pyc
ADDED
|
Binary file (45.2 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc
ADDED
|
Binary file (7.29 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/local.cpython-311.pyc
ADDED
|
Binary file (26.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/memory.cpython-311.pyc
ADDED
|
Binary file (16.5 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/reference.cpython-311.pyc
ADDED
|
Binary file (73.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc
ADDED
|
Binary file (10.9 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/smb.cpython-311.pyc
ADDED
|
Binary file (20.3 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc
ADDED
|
Binary file (5.37 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc
ADDED
|
Binary file (24.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/zip.cpython-311.pyc
ADDED
|
Binary file (9.59 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/fsspec/implementations/asyn_wrapper.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import functools
|
| 3 |
+
import inspect
|
| 4 |
+
|
| 5 |
+
from fsspec.asyn import AsyncFileSystem
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def async_wrapper(func, obj=None):
|
| 9 |
+
"""
|
| 10 |
+
Wraps a synchronous function to make it awaitable.
|
| 11 |
+
|
| 12 |
+
Parameters
|
| 13 |
+
----------
|
| 14 |
+
func : callable
|
| 15 |
+
The synchronous function to wrap.
|
| 16 |
+
obj : object, optional
|
| 17 |
+
The instance to bind the function to, if applicable.
|
| 18 |
+
|
| 19 |
+
Returns
|
| 20 |
+
-------
|
| 21 |
+
coroutine
|
| 22 |
+
An awaitable version of the function.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
@functools.wraps(func)
|
| 26 |
+
async def wrapper(*args, **kwargs):
|
| 27 |
+
return await asyncio.to_thread(func, *args, **kwargs)
|
| 28 |
+
|
| 29 |
+
return wrapper
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class AsyncFileSystemWrapper(AsyncFileSystem):
|
| 33 |
+
"""
|
| 34 |
+
A wrapper class to convert a synchronous filesystem into an asynchronous one.
|
| 35 |
+
|
| 36 |
+
This class takes an existing synchronous filesystem implementation and wraps all
|
| 37 |
+
its methods to provide an asynchronous interface.
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
sync_fs : AbstractFileSystem
|
| 42 |
+
The synchronous filesystem instance to wrap.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
def __init__(self, sync_fs, *args, **kwargs):
|
| 46 |
+
super().__init__(*args, **kwargs)
|
| 47 |
+
self.asynchronous = True
|
| 48 |
+
self.sync_fs = sync_fs
|
| 49 |
+
self.protocol = self.sync_fs.protocol
|
| 50 |
+
self._wrap_all_sync_methods()
|
| 51 |
+
|
| 52 |
+
@property
|
| 53 |
+
def fsid(self):
|
| 54 |
+
return f"async_{self.sync_fs.fsid}"
|
| 55 |
+
|
| 56 |
+
def _wrap_all_sync_methods(self):
|
| 57 |
+
"""
|
| 58 |
+
Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
|
| 59 |
+
"""
|
| 60 |
+
excluded_methods = {"open"}
|
| 61 |
+
for method_name in dir(self.sync_fs):
|
| 62 |
+
if method_name.startswith("_") or method_name in excluded_methods:
|
| 63 |
+
continue
|
| 64 |
+
|
| 65 |
+
attr = inspect.getattr_static(self.sync_fs, method_name)
|
| 66 |
+
if isinstance(attr, property):
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
method = getattr(self.sync_fs, method_name)
|
| 70 |
+
if callable(method) and not asyncio.iscoroutinefunction(method):
|
| 71 |
+
async_method = async_wrapper(method, obj=self)
|
| 72 |
+
setattr(self, f"_{method_name}", async_method)
|
| 73 |
+
|
| 74 |
+
@classmethod
|
| 75 |
+
def wrap_class(cls, sync_fs_class):
|
| 76 |
+
"""
|
| 77 |
+
Create a new class that can be used to instantiate an AsyncFileSystemWrapper
|
| 78 |
+
with lazy instantiation of the underlying synchronous filesystem.
|
| 79 |
+
|
| 80 |
+
Parameters
|
| 81 |
+
----------
|
| 82 |
+
sync_fs_class : type
|
| 83 |
+
The class of the synchronous filesystem to wrap.
|
| 84 |
+
|
| 85 |
+
Returns
|
| 86 |
+
-------
|
| 87 |
+
type
|
| 88 |
+
A new class that wraps the provided synchronous filesystem class.
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
class GeneratedAsyncFileSystemWrapper(cls):
|
| 92 |
+
def __init__(self, *args, **kwargs):
|
| 93 |
+
sync_fs = sync_fs_class(*args, **kwargs)
|
| 94 |
+
super().__init__(sync_fs)
|
| 95 |
+
|
| 96 |
+
GeneratedAsyncFileSystemWrapper.__name__ = (
|
| 97 |
+
f"Async{sync_fs_class.__name__}Wrapper"
|
| 98 |
+
)
|
| 99 |
+
return GeneratedAsyncFileSystemWrapper
|
.venv/lib/python3.11/site-packages/fsspec/implementations/cache_mapper.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import abc
|
| 4 |
+
import hashlib
|
| 5 |
+
|
| 6 |
+
from fsspec.implementations.local import make_path_posix
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class AbstractCacheMapper(abc.ABC):
|
| 10 |
+
"""Abstract super-class for mappers from remote URLs to local cached
|
| 11 |
+
basenames.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
@abc.abstractmethod
|
| 15 |
+
def __call__(self, path: str) -> str: ...
|
| 16 |
+
|
| 17 |
+
def __eq__(self, other: object) -> bool:
|
| 18 |
+
# Identity only depends on class. When derived classes have attributes
|
| 19 |
+
# they will need to be included.
|
| 20 |
+
return isinstance(other, type(self))
|
| 21 |
+
|
| 22 |
+
def __hash__(self) -> int:
|
| 23 |
+
# Identity only depends on class. When derived classes have attributes
|
| 24 |
+
# they will need to be included.
|
| 25 |
+
return hash(type(self))
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class BasenameCacheMapper(AbstractCacheMapper):
|
| 29 |
+
"""Cache mapper that uses the basename of the remote URL and a fixed number
|
| 30 |
+
of directory levels above this.
|
| 31 |
+
|
| 32 |
+
The default is zero directory levels, meaning different paths with the same
|
| 33 |
+
basename will have the same cached basename.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, directory_levels: int = 0):
|
| 37 |
+
if directory_levels < 0:
|
| 38 |
+
raise ValueError(
|
| 39 |
+
"BasenameCacheMapper requires zero or positive directory_levels"
|
| 40 |
+
)
|
| 41 |
+
self.directory_levels = directory_levels
|
| 42 |
+
|
| 43 |
+
# Separator for directories when encoded as strings.
|
| 44 |
+
self._separator = "_@_"
|
| 45 |
+
|
| 46 |
+
def __call__(self, path: str) -> str:
|
| 47 |
+
path = make_path_posix(path)
|
| 48 |
+
prefix, *bits = path.rsplit("/", self.directory_levels + 1)
|
| 49 |
+
if bits:
|
| 50 |
+
return self._separator.join(bits)
|
| 51 |
+
else:
|
| 52 |
+
return prefix # No separator found, simple filename
|
| 53 |
+
|
| 54 |
+
def __eq__(self, other: object) -> bool:
|
| 55 |
+
return super().__eq__(other) and self.directory_levels == other.directory_levels
|
| 56 |
+
|
| 57 |
+
def __hash__(self) -> int:
|
| 58 |
+
return super().__hash__() ^ hash(self.directory_levels)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class HashCacheMapper(AbstractCacheMapper):
|
| 62 |
+
"""Cache mapper that uses a hash of the remote URL."""
|
| 63 |
+
|
| 64 |
+
def __call__(self, path: str) -> str:
|
| 65 |
+
return hashlib.sha256(path.encode()).hexdigest()
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
|
| 69 |
+
"""Factory method to create cache mapper for backward compatibility with
|
| 70 |
+
``CachingFileSystem`` constructor using ``same_names`` kwarg.
|
| 71 |
+
"""
|
| 72 |
+
if same_names:
|
| 73 |
+
return BasenameCacheMapper()
|
| 74 |
+
else:
|
| 75 |
+
return HashCacheMapper()
|
.venv/lib/python3.11/site-packages/fsspec/implementations/cache_metadata.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import pickle
|
| 5 |
+
import time
|
| 6 |
+
from typing import TYPE_CHECKING
|
| 7 |
+
|
| 8 |
+
from fsspec.utils import atomic_write
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import ujson as json
|
| 12 |
+
except ImportError:
|
| 13 |
+
if not TYPE_CHECKING:
|
| 14 |
+
import json
|
| 15 |
+
|
| 16 |
+
if TYPE_CHECKING:
|
| 17 |
+
from typing import Any, Dict, Iterator, Literal
|
| 18 |
+
|
| 19 |
+
from typing_extensions import TypeAlias
|
| 20 |
+
|
| 21 |
+
from .cached import CachingFileSystem
|
| 22 |
+
|
| 23 |
+
Detail: TypeAlias = Dict[str, Any]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CacheMetadata:
|
| 27 |
+
"""Cache metadata.
|
| 28 |
+
|
| 29 |
+
All reading and writing of cache metadata is performed by this class,
|
| 30 |
+
accessing the cached files and blocks is not.
|
| 31 |
+
|
| 32 |
+
Metadata is stored in a single file per storage directory in JSON format.
|
| 33 |
+
For backward compatibility, also reads metadata stored in pickle format
|
| 34 |
+
which is converted to JSON when next saved.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self, storage: list[str]):
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
Parameters
|
| 41 |
+
----------
|
| 42 |
+
storage: list[str]
|
| 43 |
+
Directories containing cached files, must be at least one. Metadata
|
| 44 |
+
is stored in the last of these directories by convention.
|
| 45 |
+
"""
|
| 46 |
+
if not storage:
|
| 47 |
+
raise ValueError("CacheMetadata expects at least one storage location")
|
| 48 |
+
|
| 49 |
+
self._storage = storage
|
| 50 |
+
self.cached_files: list[Detail] = [{}]
|
| 51 |
+
|
| 52 |
+
# Private attribute to force saving of metadata in pickle format rather than
|
| 53 |
+
# JSON for use in tests to confirm can read both pickle and JSON formats.
|
| 54 |
+
self._force_save_pickle = False
|
| 55 |
+
|
| 56 |
+
def _load(self, fn: str) -> Detail:
|
| 57 |
+
"""Low-level function to load metadata from specific file"""
|
| 58 |
+
try:
|
| 59 |
+
with open(fn, "r") as f:
|
| 60 |
+
loaded = json.load(f)
|
| 61 |
+
except ValueError:
|
| 62 |
+
with open(fn, "rb") as f:
|
| 63 |
+
loaded = pickle.load(f)
|
| 64 |
+
for c in loaded.values():
|
| 65 |
+
if isinstance(c.get("blocks"), list):
|
| 66 |
+
c["blocks"] = set(c["blocks"])
|
| 67 |
+
return loaded
|
| 68 |
+
|
| 69 |
+
def _save(self, metadata_to_save: Detail, fn: str) -> None:
|
| 70 |
+
"""Low-level function to save metadata to specific file"""
|
| 71 |
+
if self._force_save_pickle:
|
| 72 |
+
with atomic_write(fn) as f:
|
| 73 |
+
pickle.dump(metadata_to_save, f)
|
| 74 |
+
else:
|
| 75 |
+
with atomic_write(fn, mode="w") as f:
|
| 76 |
+
json.dump(metadata_to_save, f)
|
| 77 |
+
|
| 78 |
+
def _scan_locations(
|
| 79 |
+
self, writable_only: bool = False
|
| 80 |
+
) -> Iterator[tuple[str, str, bool]]:
|
| 81 |
+
"""Yield locations (filenames) where metadata is stored, and whether
|
| 82 |
+
writable or not.
|
| 83 |
+
|
| 84 |
+
Parameters
|
| 85 |
+
----------
|
| 86 |
+
writable: bool
|
| 87 |
+
Set to True to only yield writable locations.
|
| 88 |
+
|
| 89 |
+
Returns
|
| 90 |
+
-------
|
| 91 |
+
Yields (str, str, bool)
|
| 92 |
+
"""
|
| 93 |
+
n = len(self._storage)
|
| 94 |
+
for i, storage in enumerate(self._storage):
|
| 95 |
+
writable = i == n - 1
|
| 96 |
+
if writable_only and not writable:
|
| 97 |
+
continue
|
| 98 |
+
yield os.path.join(storage, "cache"), storage, writable
|
| 99 |
+
|
| 100 |
+
def check_file(
|
| 101 |
+
self, path: str, cfs: CachingFileSystem | None
|
| 102 |
+
) -> Literal[False] | tuple[Detail, str]:
|
| 103 |
+
"""If path is in cache return its details, otherwise return ``False``.
|
| 104 |
+
|
| 105 |
+
If the optional CachingFileSystem is specified then it is used to
|
| 106 |
+
perform extra checks to reject possible matches, such as if they are
|
| 107 |
+
too old.
|
| 108 |
+
"""
|
| 109 |
+
for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files):
|
| 110 |
+
if path not in cache:
|
| 111 |
+
continue
|
| 112 |
+
detail = cache[path].copy()
|
| 113 |
+
|
| 114 |
+
if cfs is not None:
|
| 115 |
+
if cfs.check_files and detail["uid"] != cfs.fs.ukey(path):
|
| 116 |
+
# Wrong file as determined by hash of file properties
|
| 117 |
+
continue
|
| 118 |
+
if cfs.expiry and time.time() - detail["time"] > cfs.expiry:
|
| 119 |
+
# Cached file has expired
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
+
fn = os.path.join(base, detail["fn"])
|
| 123 |
+
if os.path.exists(fn):
|
| 124 |
+
return detail, fn
|
| 125 |
+
return False
|
| 126 |
+
|
| 127 |
+
def clear_expired(self, expiry_time: int) -> tuple[list[str], bool]:
|
| 128 |
+
"""Remove expired metadata from the cache.
|
| 129 |
+
|
| 130 |
+
Returns names of files corresponding to expired metadata and a boolean
|
| 131 |
+
flag indicating whether the writable cache is empty. Caller is
|
| 132 |
+
responsible for deleting the expired files.
|
| 133 |
+
"""
|
| 134 |
+
expired_files = []
|
| 135 |
+
for path, detail in self.cached_files[-1].copy().items():
|
| 136 |
+
if time.time() - detail["time"] > expiry_time:
|
| 137 |
+
fn = detail.get("fn", "")
|
| 138 |
+
if not fn:
|
| 139 |
+
raise RuntimeError(
|
| 140 |
+
f"Cache metadata does not contain 'fn' for {path}"
|
| 141 |
+
)
|
| 142 |
+
fn = os.path.join(self._storage[-1], fn)
|
| 143 |
+
expired_files.append(fn)
|
| 144 |
+
self.cached_files[-1].pop(path)
|
| 145 |
+
|
| 146 |
+
if self.cached_files[-1]:
|
| 147 |
+
cache_path = os.path.join(self._storage[-1], "cache")
|
| 148 |
+
self._save(self.cached_files[-1], cache_path)
|
| 149 |
+
|
| 150 |
+
writable_cache_empty = not self.cached_files[-1]
|
| 151 |
+
return expired_files, writable_cache_empty
|
| 152 |
+
|
| 153 |
+
def load(self) -> None:
|
| 154 |
+
"""Load all metadata from disk and store in ``self.cached_files``"""
|
| 155 |
+
cached_files = []
|
| 156 |
+
for fn, _, _ in self._scan_locations():
|
| 157 |
+
if os.path.exists(fn):
|
| 158 |
+
# TODO: consolidate blocks here
|
| 159 |
+
cached_files.append(self._load(fn))
|
| 160 |
+
else:
|
| 161 |
+
cached_files.append({})
|
| 162 |
+
self.cached_files = cached_files or [{}]
|
| 163 |
+
|
| 164 |
+
def on_close_cached_file(self, f: Any, path: str) -> None:
|
| 165 |
+
"""Perform side-effect actions on closing a cached file.
|
| 166 |
+
|
| 167 |
+
The actual closing of the file is the responsibility of the caller.
|
| 168 |
+
"""
|
| 169 |
+
# File must be writeble, so in self.cached_files[-1]
|
| 170 |
+
c = self.cached_files[-1][path]
|
| 171 |
+
if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
|
| 172 |
+
c["blocks"] = True
|
| 173 |
+
|
| 174 |
+
def pop_file(self, path: str) -> str | None:
|
| 175 |
+
"""Remove metadata of cached file.
|
| 176 |
+
|
| 177 |
+
If path is in the cache, return the filename of the cached file,
|
| 178 |
+
otherwise return ``None``. Caller is responsible for deleting the
|
| 179 |
+
cached file.
|
| 180 |
+
"""
|
| 181 |
+
details = self.check_file(path, None)
|
| 182 |
+
if not details:
|
| 183 |
+
return None
|
| 184 |
+
_, fn = details
|
| 185 |
+
if fn.startswith(self._storage[-1]):
|
| 186 |
+
self.cached_files[-1].pop(path)
|
| 187 |
+
self.save()
|
| 188 |
+
else:
|
| 189 |
+
raise PermissionError(
|
| 190 |
+
"Can only delete cached file in last, writable cache location"
|
| 191 |
+
)
|
| 192 |
+
return fn
|
| 193 |
+
|
| 194 |
+
def save(self) -> None:
|
| 195 |
+
"""Save metadata to disk"""
|
| 196 |
+
for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files):
|
| 197 |
+
if not writable:
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
if os.path.exists(fn):
|
| 201 |
+
cached_files = self._load(fn)
|
| 202 |
+
for k, c in cached_files.items():
|
| 203 |
+
if k in cache:
|
| 204 |
+
if c["blocks"] is True or cache[k]["blocks"] is True:
|
| 205 |
+
c["blocks"] = True
|
| 206 |
+
else:
|
| 207 |
+
# self.cached_files[*][*]["blocks"] must continue to
|
| 208 |
+
# point to the same set object so that updates
|
| 209 |
+
# performed by MMapCache are propagated back to
|
| 210 |
+
# self.cached_files.
|
| 211 |
+
blocks = cache[k]["blocks"]
|
| 212 |
+
blocks.update(c["blocks"])
|
| 213 |
+
c["blocks"] = blocks
|
| 214 |
+
c["time"] = max(c["time"], cache[k]["time"])
|
| 215 |
+
c["uid"] = cache[k]["uid"]
|
| 216 |
+
|
| 217 |
+
# Files can be added to cache after it was written once
|
| 218 |
+
for k, c in cache.items():
|
| 219 |
+
if k not in cached_files:
|
| 220 |
+
cached_files[k] = c
|
| 221 |
+
else:
|
| 222 |
+
cached_files = cache
|
| 223 |
+
cache = {k: v.copy() for k, v in cached_files.items()}
|
| 224 |
+
for c in cache.values():
|
| 225 |
+
if isinstance(c["blocks"], set):
|
| 226 |
+
c["blocks"] = list(c["blocks"])
|
| 227 |
+
self._save(cache, fn)
|
| 228 |
+
self.cached_files[-1] = cached_files
|
| 229 |
+
|
| 230 |
+
def update_file(self, path: str, detail: Detail) -> None:
|
| 231 |
+
"""Update metadata for specific file in memory, do not save"""
|
| 232 |
+
self.cached_files[-1][path] = detail
|
.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py
ADDED
|
@@ -0,0 +1,929 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import time
|
| 8 |
+
import weakref
|
| 9 |
+
from shutil import rmtree
|
| 10 |
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar
|
| 11 |
+
|
| 12 |
+
from fsspec import AbstractFileSystem, filesystem
|
| 13 |
+
from fsspec.callbacks import DEFAULT_CALLBACK
|
| 14 |
+
from fsspec.compression import compr
|
| 15 |
+
from fsspec.core import BaseCache, MMapCache
|
| 16 |
+
from fsspec.exceptions import BlocksizeMismatchError
|
| 17 |
+
from fsspec.implementations.cache_mapper import create_cache_mapper
|
| 18 |
+
from fsspec.implementations.cache_metadata import CacheMetadata
|
| 19 |
+
from fsspec.spec import AbstractBufferedFile
|
| 20 |
+
from fsspec.transaction import Transaction
|
| 21 |
+
from fsspec.utils import infer_compression
|
| 22 |
+
|
| 23 |
+
if TYPE_CHECKING:
|
| 24 |
+
from fsspec.implementations.cache_mapper import AbstractCacheMapper
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger("fsspec.cached")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class WriteCachedTransaction(Transaction):
|
| 30 |
+
def complete(self, commit=True):
|
| 31 |
+
rpaths = [f.path for f in self.files]
|
| 32 |
+
lpaths = [f.fn for f in self.files]
|
| 33 |
+
if commit:
|
| 34 |
+
self.fs.put(lpaths, rpaths)
|
| 35 |
+
self.files.clear()
|
| 36 |
+
self.fs._intrans = False
|
| 37 |
+
self.fs._transaction = None
|
| 38 |
+
self.fs = None # break cycle
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class CachingFileSystem(AbstractFileSystem):
|
| 42 |
+
"""Locally caching filesystem, layer over any other FS
|
| 43 |
+
|
| 44 |
+
This class implements chunk-wise local storage of remote files, for quick
|
| 45 |
+
access after the initial download. The files are stored in a given
|
| 46 |
+
directory with hashes of URLs for the filenames. If no directory is given,
|
| 47 |
+
a temporary one is used, which should be cleaned up by the OS after the
|
| 48 |
+
process ends. The files themselves are sparse (as implemented in
|
| 49 |
+
:class:`~fsspec.caching.MMapCache`), so only the data which is accessed
|
| 50 |
+
takes up space.
|
| 51 |
+
|
| 52 |
+
Restrictions:
|
| 53 |
+
|
| 54 |
+
- the block-size must be the same for each access of a given file, unless
|
| 55 |
+
all blocks of the file have already been read
|
| 56 |
+
- caching can only be applied to file-systems which produce files
|
| 57 |
+
derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
|
| 58 |
+
allowed, for testing
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
|
| 62 |
+
|
| 63 |
+
def __init__(
|
| 64 |
+
self,
|
| 65 |
+
target_protocol=None,
|
| 66 |
+
cache_storage="TMP",
|
| 67 |
+
cache_check=10,
|
| 68 |
+
check_files=False,
|
| 69 |
+
expiry_time=604800,
|
| 70 |
+
target_options=None,
|
| 71 |
+
fs=None,
|
| 72 |
+
same_names: bool | None = None,
|
| 73 |
+
compression=None,
|
| 74 |
+
cache_mapper: AbstractCacheMapper | None = None,
|
| 75 |
+
**kwargs,
|
| 76 |
+
):
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
Parameters
|
| 80 |
+
----------
|
| 81 |
+
target_protocol: str (optional)
|
| 82 |
+
Target filesystem protocol. Provide either this or ``fs``.
|
| 83 |
+
cache_storage: str or list(str)
|
| 84 |
+
Location to store files. If "TMP", this is a temporary directory,
|
| 85 |
+
and will be cleaned up by the OS when this process ends (or later).
|
| 86 |
+
If a list, each location will be tried in the order given, but
|
| 87 |
+
only the last will be considered writable.
|
| 88 |
+
cache_check: int
|
| 89 |
+
Number of seconds between reload of cache metadata
|
| 90 |
+
check_files: bool
|
| 91 |
+
Whether to explicitly see if the UID of the remote file matches
|
| 92 |
+
the stored one before using. Warning: some file systems such as
|
| 93 |
+
HTTP cannot reliably give a unique hash of the contents of some
|
| 94 |
+
path, so be sure to set this option to False.
|
| 95 |
+
expiry_time: int
|
| 96 |
+
The time in seconds after which a local copy is considered useless.
|
| 97 |
+
Set to falsy to prevent expiry. The default is equivalent to one
|
| 98 |
+
week.
|
| 99 |
+
target_options: dict or None
|
| 100 |
+
Passed to the instantiation of the FS, if fs is None.
|
| 101 |
+
fs: filesystem instance
|
| 102 |
+
The target filesystem to run against. Provide this or ``protocol``.
|
| 103 |
+
same_names: bool (optional)
|
| 104 |
+
By default, target URLs are hashed using a ``HashCacheMapper`` so
|
| 105 |
+
that files from different backends with the same basename do not
|
| 106 |
+
conflict. If this argument is ``true``, a ``BasenameCacheMapper``
|
| 107 |
+
is used instead. Other cache mapper options are available by using
|
| 108 |
+
the ``cache_mapper`` keyword argument. Only one of this and
|
| 109 |
+
``cache_mapper`` should be specified.
|
| 110 |
+
compression: str (optional)
|
| 111 |
+
To decompress on download. Can be 'infer' (guess from the URL name),
|
| 112 |
+
one of the entries in ``fsspec.compression.compr``, or None for no
|
| 113 |
+
decompression.
|
| 114 |
+
cache_mapper: AbstractCacheMapper (optional)
|
| 115 |
+
The object use to map from original filenames to cached filenames.
|
| 116 |
+
Only one of this and ``same_names`` should be specified.
|
| 117 |
+
"""
|
| 118 |
+
super().__init__(**kwargs)
|
| 119 |
+
if fs is None and target_protocol is None:
|
| 120 |
+
raise ValueError(
|
| 121 |
+
"Please provide filesystem instance(fs) or target_protocol"
|
| 122 |
+
)
|
| 123 |
+
if not (fs is None) ^ (target_protocol is None):
|
| 124 |
+
raise ValueError(
|
| 125 |
+
"Both filesystems (fs) and target_protocol may not be both given."
|
| 126 |
+
)
|
| 127 |
+
if cache_storage == "TMP":
|
| 128 |
+
tempdir = tempfile.mkdtemp()
|
| 129 |
+
storage = [tempdir]
|
| 130 |
+
weakref.finalize(self, self._remove_tempdir, tempdir)
|
| 131 |
+
else:
|
| 132 |
+
if isinstance(cache_storage, str):
|
| 133 |
+
storage = [cache_storage]
|
| 134 |
+
else:
|
| 135 |
+
storage = cache_storage
|
| 136 |
+
os.makedirs(storage[-1], exist_ok=True)
|
| 137 |
+
self.storage = storage
|
| 138 |
+
self.kwargs = target_options or {}
|
| 139 |
+
self.cache_check = cache_check
|
| 140 |
+
self.check_files = check_files
|
| 141 |
+
self.expiry = expiry_time
|
| 142 |
+
self.compression = compression
|
| 143 |
+
|
| 144 |
+
# Size of cache in bytes. If None then the size is unknown and will be
|
| 145 |
+
# recalculated the next time cache_size() is called. On writes to the
|
| 146 |
+
# cache this is reset to None.
|
| 147 |
+
self._cache_size = None
|
| 148 |
+
|
| 149 |
+
if same_names is not None and cache_mapper is not None:
|
| 150 |
+
raise ValueError(
|
| 151 |
+
"Cannot specify both same_names and cache_mapper in "
|
| 152 |
+
"CachingFileSystem.__init__"
|
| 153 |
+
)
|
| 154 |
+
if cache_mapper is not None:
|
| 155 |
+
self._mapper = cache_mapper
|
| 156 |
+
else:
|
| 157 |
+
self._mapper = create_cache_mapper(
|
| 158 |
+
same_names if same_names is not None else False
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
self.target_protocol = (
|
| 162 |
+
target_protocol
|
| 163 |
+
if isinstance(target_protocol, str)
|
| 164 |
+
else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0])
|
| 165 |
+
)
|
| 166 |
+
self._metadata = CacheMetadata(self.storage)
|
| 167 |
+
self.load_cache()
|
| 168 |
+
self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs)
|
| 169 |
+
|
| 170 |
+
def _strip_protocol(path):
|
| 171 |
+
# acts as a method, since each instance has a difference target
|
| 172 |
+
return self.fs._strip_protocol(type(self)._strip_protocol(path))
|
| 173 |
+
|
| 174 |
+
self._strip_protocol: Callable = _strip_protocol
|
| 175 |
+
|
| 176 |
+
@staticmethod
|
| 177 |
+
def _remove_tempdir(tempdir):
|
| 178 |
+
try:
|
| 179 |
+
rmtree(tempdir)
|
| 180 |
+
except Exception:
|
| 181 |
+
pass
|
| 182 |
+
|
| 183 |
+
def _mkcache(self):
|
| 184 |
+
os.makedirs(self.storage[-1], exist_ok=True)
|
| 185 |
+
|
| 186 |
+
def cache_size(self):
|
| 187 |
+
"""Return size of cache in bytes.
|
| 188 |
+
|
| 189 |
+
If more than one cache directory is in use, only the size of the last
|
| 190 |
+
one (the writable cache directory) is returned.
|
| 191 |
+
"""
|
| 192 |
+
if self._cache_size is None:
|
| 193 |
+
cache_dir = self.storage[-1]
|
| 194 |
+
self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
|
| 195 |
+
return self._cache_size
|
| 196 |
+
|
| 197 |
+
def load_cache(self):
|
| 198 |
+
"""Read set of stored blocks from file"""
|
| 199 |
+
self._metadata.load()
|
| 200 |
+
self._mkcache()
|
| 201 |
+
self.last_cache = time.time()
|
| 202 |
+
|
| 203 |
+
def save_cache(self):
|
| 204 |
+
"""Save set of stored blocks from file"""
|
| 205 |
+
self._mkcache()
|
| 206 |
+
self._metadata.save()
|
| 207 |
+
self.last_cache = time.time()
|
| 208 |
+
self._cache_size = None
|
| 209 |
+
|
| 210 |
+
def _check_cache(self):
|
| 211 |
+
"""Reload caches if time elapsed or any disappeared"""
|
| 212 |
+
self._mkcache()
|
| 213 |
+
if not self.cache_check:
|
| 214 |
+
# explicitly told not to bother checking
|
| 215 |
+
return
|
| 216 |
+
timecond = time.time() - self.last_cache > self.cache_check
|
| 217 |
+
existcond = all(os.path.exists(storage) for storage in self.storage)
|
| 218 |
+
if timecond or not existcond:
|
| 219 |
+
self.load_cache()
|
| 220 |
+
|
| 221 |
+
def _check_file(self, path):
|
| 222 |
+
"""Is path in cache and still valid"""
|
| 223 |
+
path = self._strip_protocol(path)
|
| 224 |
+
self._check_cache()
|
| 225 |
+
return self._metadata.check_file(path, self)
|
| 226 |
+
|
| 227 |
+
def clear_cache(self):
|
| 228 |
+
"""Remove all files and metadata from the cache
|
| 229 |
+
|
| 230 |
+
In the case of multiple cache locations, this clears only the last one,
|
| 231 |
+
which is assumed to be the read/write one.
|
| 232 |
+
"""
|
| 233 |
+
rmtree(self.storage[-1])
|
| 234 |
+
self.load_cache()
|
| 235 |
+
self._cache_size = None
|
| 236 |
+
|
| 237 |
+
def clear_expired_cache(self, expiry_time=None):
|
| 238 |
+
"""Remove all expired files and metadata from the cache
|
| 239 |
+
|
| 240 |
+
In the case of multiple cache locations, this clears only the last one,
|
| 241 |
+
which is assumed to be the read/write one.
|
| 242 |
+
|
| 243 |
+
Parameters
|
| 244 |
+
----------
|
| 245 |
+
expiry_time: int
|
| 246 |
+
The time in seconds after which a local copy is considered useless.
|
| 247 |
+
If not defined the default is equivalent to the attribute from the
|
| 248 |
+
file caching instantiation.
|
| 249 |
+
"""
|
| 250 |
+
|
| 251 |
+
if not expiry_time:
|
| 252 |
+
expiry_time = self.expiry
|
| 253 |
+
|
| 254 |
+
self._check_cache()
|
| 255 |
+
|
| 256 |
+
expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time)
|
| 257 |
+
for fn in expired_files:
|
| 258 |
+
if os.path.exists(fn):
|
| 259 |
+
os.remove(fn)
|
| 260 |
+
|
| 261 |
+
if writable_cache_empty:
|
| 262 |
+
rmtree(self.storage[-1])
|
| 263 |
+
self.load_cache()
|
| 264 |
+
|
| 265 |
+
self._cache_size = None
|
| 266 |
+
|
| 267 |
+
def pop_from_cache(self, path):
|
| 268 |
+
"""Remove cached version of given file
|
| 269 |
+
|
| 270 |
+
Deletes local copy of the given (remote) path. If it is found in a cache
|
| 271 |
+
location which is not the last, it is assumed to be read-only, and
|
| 272 |
+
raises PermissionError
|
| 273 |
+
"""
|
| 274 |
+
path = self._strip_protocol(path)
|
| 275 |
+
fn = self._metadata.pop_file(path)
|
| 276 |
+
if fn is not None:
|
| 277 |
+
os.remove(fn)
|
| 278 |
+
self._cache_size = None
|
| 279 |
+
|
| 280 |
+
def _open(
|
| 281 |
+
self,
|
| 282 |
+
path,
|
| 283 |
+
mode="rb",
|
| 284 |
+
block_size=None,
|
| 285 |
+
autocommit=True,
|
| 286 |
+
cache_options=None,
|
| 287 |
+
**kwargs,
|
| 288 |
+
):
|
| 289 |
+
"""Wrap the target _open
|
| 290 |
+
|
| 291 |
+
If the whole file exists in the cache, just open it locally and
|
| 292 |
+
return that.
|
| 293 |
+
|
| 294 |
+
Otherwise, open the file on the target FS, and make it have a mmap
|
| 295 |
+
cache pointing to the location which we determine, in our cache.
|
| 296 |
+
The ``blocks`` instance is shared, so as the mmap cache instance
|
| 297 |
+
updates, so does the entry in our ``cached_files`` attribute.
|
| 298 |
+
We monkey-patch this file, so that when it closes, we call
|
| 299 |
+
``close_and_update`` to save the state of the blocks.
|
| 300 |
+
"""
|
| 301 |
+
path = self._strip_protocol(path)
|
| 302 |
+
|
| 303 |
+
path = self.fs._strip_protocol(path)
|
| 304 |
+
if "r" not in mode:
|
| 305 |
+
return self.fs._open(
|
| 306 |
+
path,
|
| 307 |
+
mode=mode,
|
| 308 |
+
block_size=block_size,
|
| 309 |
+
autocommit=autocommit,
|
| 310 |
+
cache_options=cache_options,
|
| 311 |
+
**kwargs,
|
| 312 |
+
)
|
| 313 |
+
detail = self._check_file(path)
|
| 314 |
+
if detail:
|
| 315 |
+
# file is in cache
|
| 316 |
+
detail, fn = detail
|
| 317 |
+
hash, blocks = detail["fn"], detail["blocks"]
|
| 318 |
+
if blocks is True:
|
| 319 |
+
# stored file is complete
|
| 320 |
+
logger.debug("Opening local copy of %s", path)
|
| 321 |
+
return open(fn, mode)
|
| 322 |
+
# TODO: action where partial file exists in read-only cache
|
| 323 |
+
logger.debug("Opening partially cached copy of %s", path)
|
| 324 |
+
else:
|
| 325 |
+
hash = self._mapper(path)
|
| 326 |
+
fn = os.path.join(self.storage[-1], hash)
|
| 327 |
+
blocks = set()
|
| 328 |
+
detail = {
|
| 329 |
+
"original": path,
|
| 330 |
+
"fn": hash,
|
| 331 |
+
"blocks": blocks,
|
| 332 |
+
"time": time.time(),
|
| 333 |
+
"uid": self.fs.ukey(path),
|
| 334 |
+
}
|
| 335 |
+
self._metadata.update_file(path, detail)
|
| 336 |
+
logger.debug("Creating local sparse file for %s", path)
|
| 337 |
+
|
| 338 |
+
# call target filesystems open
|
| 339 |
+
self._mkcache()
|
| 340 |
+
f = self.fs._open(
|
| 341 |
+
path,
|
| 342 |
+
mode=mode,
|
| 343 |
+
block_size=block_size,
|
| 344 |
+
autocommit=autocommit,
|
| 345 |
+
cache_options=cache_options,
|
| 346 |
+
cache_type="none",
|
| 347 |
+
**kwargs,
|
| 348 |
+
)
|
| 349 |
+
if self.compression:
|
| 350 |
+
comp = (
|
| 351 |
+
infer_compression(path)
|
| 352 |
+
if self.compression == "infer"
|
| 353 |
+
else self.compression
|
| 354 |
+
)
|
| 355 |
+
f = compr[comp](f, mode="rb")
|
| 356 |
+
if "blocksize" in detail:
|
| 357 |
+
if detail["blocksize"] != f.blocksize:
|
| 358 |
+
raise BlocksizeMismatchError(
|
| 359 |
+
f"Cached file must be reopened with same block"
|
| 360 |
+
f" size as original (old: {detail['blocksize']},"
|
| 361 |
+
f" new {f.blocksize})"
|
| 362 |
+
)
|
| 363 |
+
else:
|
| 364 |
+
detail["blocksize"] = f.blocksize
|
| 365 |
+
f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
|
| 366 |
+
close = f.close
|
| 367 |
+
f.close = lambda: self.close_and_update(f, close)
|
| 368 |
+
self.save_cache()
|
| 369 |
+
return f
|
| 370 |
+
|
| 371 |
+
def _parent(self, path):
|
| 372 |
+
return self.fs._parent(path)
|
| 373 |
+
|
| 374 |
+
def hash_name(self, path: str, *args: Any) -> str:
|
| 375 |
+
# Kept for backward compatibility with downstream libraries.
|
| 376 |
+
# Ignores extra arguments, previously same_name boolean.
|
| 377 |
+
return self._mapper(path)
|
| 378 |
+
|
| 379 |
+
def close_and_update(self, f, close):
|
| 380 |
+
"""Called when a file is closing, so store the set of blocks"""
|
| 381 |
+
if f.closed:
|
| 382 |
+
return
|
| 383 |
+
path = self._strip_protocol(f.path)
|
| 384 |
+
self._metadata.on_close_cached_file(f, path)
|
| 385 |
+
try:
|
| 386 |
+
logger.debug("going to save")
|
| 387 |
+
self.save_cache()
|
| 388 |
+
logger.debug("saved")
|
| 389 |
+
except OSError:
|
| 390 |
+
logger.debug("Cache saving failed while closing file")
|
| 391 |
+
except NameError:
|
| 392 |
+
logger.debug("Cache save failed due to interpreter shutdown")
|
| 393 |
+
close()
|
| 394 |
+
f.closed = True
|
| 395 |
+
|
| 396 |
+
def ls(self, path, detail=True):
|
| 397 |
+
return self.fs.ls(path, detail)
|
| 398 |
+
|
| 399 |
+
def __getattribute__(self, item):
|
| 400 |
+
if item in {
|
| 401 |
+
"load_cache",
|
| 402 |
+
"_open",
|
| 403 |
+
"save_cache",
|
| 404 |
+
"close_and_update",
|
| 405 |
+
"__init__",
|
| 406 |
+
"__getattribute__",
|
| 407 |
+
"__reduce__",
|
| 408 |
+
"_make_local_details",
|
| 409 |
+
"open",
|
| 410 |
+
"cat",
|
| 411 |
+
"cat_file",
|
| 412 |
+
"cat_ranges",
|
| 413 |
+
"get",
|
| 414 |
+
"read_block",
|
| 415 |
+
"tail",
|
| 416 |
+
"head",
|
| 417 |
+
"info",
|
| 418 |
+
"ls",
|
| 419 |
+
"exists",
|
| 420 |
+
"isfile",
|
| 421 |
+
"isdir",
|
| 422 |
+
"_check_file",
|
| 423 |
+
"_check_cache",
|
| 424 |
+
"_mkcache",
|
| 425 |
+
"clear_cache",
|
| 426 |
+
"clear_expired_cache",
|
| 427 |
+
"pop_from_cache",
|
| 428 |
+
"local_file",
|
| 429 |
+
"_paths_from_path",
|
| 430 |
+
"get_mapper",
|
| 431 |
+
"open_many",
|
| 432 |
+
"commit_many",
|
| 433 |
+
"hash_name",
|
| 434 |
+
"__hash__",
|
| 435 |
+
"__eq__",
|
| 436 |
+
"to_json",
|
| 437 |
+
"to_dict",
|
| 438 |
+
"cache_size",
|
| 439 |
+
"pipe_file",
|
| 440 |
+
"pipe",
|
| 441 |
+
"start_transaction",
|
| 442 |
+
"end_transaction",
|
| 443 |
+
}:
|
| 444 |
+
# all the methods defined in this class. Note `open` here, since
|
| 445 |
+
# it calls `_open`, but is actually in superclass
|
| 446 |
+
return lambda *args, **kw: getattr(type(self), item).__get__(self)(
|
| 447 |
+
*args, **kw
|
| 448 |
+
)
|
| 449 |
+
if item in ["__reduce_ex__"]:
|
| 450 |
+
raise AttributeError
|
| 451 |
+
if item in ["transaction"]:
|
| 452 |
+
# property
|
| 453 |
+
return type(self).transaction.__get__(self)
|
| 454 |
+
if item in ["_cache", "transaction_type"]:
|
| 455 |
+
# class attributes
|
| 456 |
+
return getattr(type(self), item)
|
| 457 |
+
if item == "__class__":
|
| 458 |
+
return type(self)
|
| 459 |
+
d = object.__getattribute__(self, "__dict__")
|
| 460 |
+
fs = d.get("fs", None) # fs is not immediately defined
|
| 461 |
+
if item in d:
|
| 462 |
+
return d[item]
|
| 463 |
+
elif fs is not None:
|
| 464 |
+
if item in fs.__dict__:
|
| 465 |
+
# attribute of instance
|
| 466 |
+
return fs.__dict__[item]
|
| 467 |
+
# attributed belonging to the target filesystem
|
| 468 |
+
cls = type(fs)
|
| 469 |
+
m = getattr(cls, item)
|
| 470 |
+
if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
|
| 471 |
+
not hasattr(m, "__self__") or m.__self__ is None
|
| 472 |
+
):
|
| 473 |
+
# instance method
|
| 474 |
+
return m.__get__(fs, cls)
|
| 475 |
+
return m # class method or attribute
|
| 476 |
+
else:
|
| 477 |
+
# attributes of the superclass, while target is being set up
|
| 478 |
+
return super().__getattribute__(item)
|
| 479 |
+
|
| 480 |
+
def __eq__(self, other):
|
| 481 |
+
"""Test for equality."""
|
| 482 |
+
if self is other:
|
| 483 |
+
return True
|
| 484 |
+
if not isinstance(other, type(self)):
|
| 485 |
+
return False
|
| 486 |
+
return (
|
| 487 |
+
self.storage == other.storage
|
| 488 |
+
and self.kwargs == other.kwargs
|
| 489 |
+
and self.cache_check == other.cache_check
|
| 490 |
+
and self.check_files == other.check_files
|
| 491 |
+
and self.expiry == other.expiry
|
| 492 |
+
and self.compression == other.compression
|
| 493 |
+
and self._mapper == other._mapper
|
| 494 |
+
and self.target_protocol == other.target_protocol
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
def __hash__(self):
|
| 498 |
+
"""Calculate hash."""
|
| 499 |
+
return (
|
| 500 |
+
hash(tuple(self.storage))
|
| 501 |
+
^ hash(str(self.kwargs))
|
| 502 |
+
^ hash(self.cache_check)
|
| 503 |
+
^ hash(self.check_files)
|
| 504 |
+
^ hash(self.expiry)
|
| 505 |
+
^ hash(self.compression)
|
| 506 |
+
^ hash(self._mapper)
|
| 507 |
+
^ hash(self.target_protocol)
|
| 508 |
+
)
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
class WholeFileCacheFileSystem(CachingFileSystem):
|
| 512 |
+
"""Caches whole remote files on first access
|
| 513 |
+
|
| 514 |
+
This class is intended as a layer over any other file system, and
|
| 515 |
+
will make a local copy of each file accessed, so that all subsequent
|
| 516 |
+
reads are local. This is similar to ``CachingFileSystem``, but without
|
| 517 |
+
the block-wise functionality and so can work even when sparse files
|
| 518 |
+
are not allowed. See its docstring for definition of the init
|
| 519 |
+
arguments.
|
| 520 |
+
|
| 521 |
+
The class still needs access to the remote store for listing files,
|
| 522 |
+
and may refresh cached files.
|
| 523 |
+
"""
|
| 524 |
+
|
| 525 |
+
protocol = "filecache"
|
| 526 |
+
local_file = True
|
| 527 |
+
|
| 528 |
+
def open_many(self, open_files, **kwargs):
|
| 529 |
+
paths = [of.path for of in open_files]
|
| 530 |
+
if "r" in open_files.mode:
|
| 531 |
+
self._mkcache()
|
| 532 |
+
else:
|
| 533 |
+
return [
|
| 534 |
+
LocalTempFile(
|
| 535 |
+
self.fs,
|
| 536 |
+
path,
|
| 537 |
+
mode=open_files.mode,
|
| 538 |
+
fn=os.path.join(self.storage[-1], self._mapper(path)),
|
| 539 |
+
**kwargs,
|
| 540 |
+
)
|
| 541 |
+
for path in paths
|
| 542 |
+
]
|
| 543 |
+
|
| 544 |
+
if self.compression:
|
| 545 |
+
raise NotImplementedError
|
| 546 |
+
details = [self._check_file(sp) for sp in paths]
|
| 547 |
+
downpath = [p for p, d in zip(paths, details) if not d]
|
| 548 |
+
downfn0 = [
|
| 549 |
+
os.path.join(self.storage[-1], self._mapper(p))
|
| 550 |
+
for p, d in zip(paths, details)
|
| 551 |
+
] # keep these path names for opening later
|
| 552 |
+
downfn = [fn for fn, d in zip(downfn0, details) if not d]
|
| 553 |
+
if downpath:
|
| 554 |
+
# skip if all files are already cached and up to date
|
| 555 |
+
self.fs.get(downpath, downfn)
|
| 556 |
+
|
| 557 |
+
# update metadata - only happens when downloads are successful
|
| 558 |
+
newdetail = [
|
| 559 |
+
{
|
| 560 |
+
"original": path,
|
| 561 |
+
"fn": self._mapper(path),
|
| 562 |
+
"blocks": True,
|
| 563 |
+
"time": time.time(),
|
| 564 |
+
"uid": self.fs.ukey(path),
|
| 565 |
+
}
|
| 566 |
+
for path in downpath
|
| 567 |
+
]
|
| 568 |
+
for path, detail in zip(downpath, newdetail):
|
| 569 |
+
self._metadata.update_file(path, detail)
|
| 570 |
+
self.save_cache()
|
| 571 |
+
|
| 572 |
+
def firstpart(fn):
|
| 573 |
+
# helper to adapt both whole-file and simple-cache
|
| 574 |
+
return fn[1] if isinstance(fn, tuple) else fn
|
| 575 |
+
|
| 576 |
+
return [
|
| 577 |
+
open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode)
|
| 578 |
+
for fn0, fn1 in zip(details, downfn0)
|
| 579 |
+
]
|
| 580 |
+
|
| 581 |
+
def commit_many(self, open_files):
|
| 582 |
+
self.fs.put([f.fn for f in open_files], [f.path for f in open_files])
|
| 583 |
+
[f.close() for f in open_files]
|
| 584 |
+
for f in open_files:
|
| 585 |
+
# in case autocommit is off, and so close did not already delete
|
| 586 |
+
try:
|
| 587 |
+
os.remove(f.name)
|
| 588 |
+
except FileNotFoundError:
|
| 589 |
+
pass
|
| 590 |
+
self._cache_size = None
|
| 591 |
+
|
| 592 |
+
def _make_local_details(self, path):
|
| 593 |
+
hash = self._mapper(path)
|
| 594 |
+
fn = os.path.join(self.storage[-1], hash)
|
| 595 |
+
detail = {
|
| 596 |
+
"original": path,
|
| 597 |
+
"fn": hash,
|
| 598 |
+
"blocks": True,
|
| 599 |
+
"time": time.time(),
|
| 600 |
+
"uid": self.fs.ukey(path),
|
| 601 |
+
}
|
| 602 |
+
self._metadata.update_file(path, detail)
|
| 603 |
+
logger.debug("Copying %s to local cache", path)
|
| 604 |
+
return fn
|
| 605 |
+
|
| 606 |
+
def cat(
|
| 607 |
+
self,
|
| 608 |
+
path,
|
| 609 |
+
recursive=False,
|
| 610 |
+
on_error="raise",
|
| 611 |
+
callback=DEFAULT_CALLBACK,
|
| 612 |
+
**kwargs,
|
| 613 |
+
):
|
| 614 |
+
paths = self.expand_path(
|
| 615 |
+
path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
|
| 616 |
+
)
|
| 617 |
+
getpaths = []
|
| 618 |
+
storepaths = []
|
| 619 |
+
fns = []
|
| 620 |
+
out = {}
|
| 621 |
+
for p in paths.copy():
|
| 622 |
+
try:
|
| 623 |
+
detail = self._check_file(p)
|
| 624 |
+
if not detail:
|
| 625 |
+
fn = self._make_local_details(p)
|
| 626 |
+
getpaths.append(p)
|
| 627 |
+
storepaths.append(fn)
|
| 628 |
+
else:
|
| 629 |
+
detail, fn = detail if isinstance(detail, tuple) else (None, detail)
|
| 630 |
+
fns.append(fn)
|
| 631 |
+
except Exception as e:
|
| 632 |
+
if on_error == "raise":
|
| 633 |
+
raise
|
| 634 |
+
if on_error == "return":
|
| 635 |
+
out[p] = e
|
| 636 |
+
paths.remove(p)
|
| 637 |
+
|
| 638 |
+
if getpaths:
|
| 639 |
+
self.fs.get(getpaths, storepaths)
|
| 640 |
+
self.save_cache()
|
| 641 |
+
|
| 642 |
+
callback.set_size(len(paths))
|
| 643 |
+
for p, fn in zip(paths, fns):
|
| 644 |
+
with open(fn, "rb") as f:
|
| 645 |
+
out[p] = f.read()
|
| 646 |
+
callback.relative_update(1)
|
| 647 |
+
if isinstance(path, str) and len(paths) == 1 and recursive is False:
|
| 648 |
+
out = out[paths[0]]
|
| 649 |
+
return out
|
| 650 |
+
|
| 651 |
+
def _open(self, path, mode="rb", **kwargs):
|
| 652 |
+
path = self._strip_protocol(path)
|
| 653 |
+
if "r" not in mode:
|
| 654 |
+
hash = self._mapper(path)
|
| 655 |
+
fn = os.path.join(self.storage[-1], hash)
|
| 656 |
+
user_specified_kwargs = {
|
| 657 |
+
k: v
|
| 658 |
+
for k, v in kwargs.items()
|
| 659 |
+
# those kwargs were added by open(), we don't want them
|
| 660 |
+
if k not in ["autocommit", "block_size", "cache_options"]
|
| 661 |
+
}
|
| 662 |
+
return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
|
| 663 |
+
detail = self._check_file(path)
|
| 664 |
+
if detail:
|
| 665 |
+
detail, fn = detail
|
| 666 |
+
_, blocks = detail["fn"], detail["blocks"]
|
| 667 |
+
if blocks is True:
|
| 668 |
+
logger.debug("Opening local copy of %s", path)
|
| 669 |
+
|
| 670 |
+
# In order to support downstream filesystems to be able to
|
| 671 |
+
# infer the compression from the original filename, like
|
| 672 |
+
# the `TarFileSystem`, let's extend the `io.BufferedReader`
|
| 673 |
+
# fileobject protocol by adding a dedicated attribute
|
| 674 |
+
# `original`.
|
| 675 |
+
f = open(fn, mode)
|
| 676 |
+
f.original = detail.get("original")
|
| 677 |
+
return f
|
| 678 |
+
else:
|
| 679 |
+
raise ValueError(
|
| 680 |
+
f"Attempt to open partially cached file {path}"
|
| 681 |
+
f" as a wholly cached file"
|
| 682 |
+
)
|
| 683 |
+
else:
|
| 684 |
+
fn = self._make_local_details(path)
|
| 685 |
+
kwargs["mode"] = mode
|
| 686 |
+
|
| 687 |
+
# call target filesystems open
|
| 688 |
+
self._mkcache()
|
| 689 |
+
if self.compression:
|
| 690 |
+
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
|
| 691 |
+
if isinstance(f, AbstractBufferedFile):
|
| 692 |
+
# want no type of caching if just downloading whole thing
|
| 693 |
+
f.cache = BaseCache(0, f.cache.fetcher, f.size)
|
| 694 |
+
comp = (
|
| 695 |
+
infer_compression(path)
|
| 696 |
+
if self.compression == "infer"
|
| 697 |
+
else self.compression
|
| 698 |
+
)
|
| 699 |
+
f = compr[comp](f, mode="rb")
|
| 700 |
+
data = True
|
| 701 |
+
while data:
|
| 702 |
+
block = getattr(f, "blocksize", 5 * 2**20)
|
| 703 |
+
data = f.read(block)
|
| 704 |
+
f2.write(data)
|
| 705 |
+
else:
|
| 706 |
+
self.fs.get_file(path, fn)
|
| 707 |
+
self.save_cache()
|
| 708 |
+
return self._open(path, mode)
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
| 712 |
+
"""Caches whole remote files on first access
|
| 713 |
+
|
| 714 |
+
This class is intended as a layer over any other file system, and
|
| 715 |
+
will make a local copy of each file accessed, so that all subsequent
|
| 716 |
+
reads are local. This implementation only copies whole files, and
|
| 717 |
+
does not keep any metadata about the download time or file details.
|
| 718 |
+
It is therefore safer to use in multi-threaded/concurrent situations.
|
| 719 |
+
|
| 720 |
+
This is the only of the caching filesystems that supports write: you will
|
| 721 |
+
be given a real local open file, and upon close and commit, it will be
|
| 722 |
+
uploaded to the target filesystem; the writability or the target URL is
|
| 723 |
+
not checked until that time.
|
| 724 |
+
|
| 725 |
+
"""
|
| 726 |
+
|
| 727 |
+
protocol = "simplecache"
|
| 728 |
+
local_file = True
|
| 729 |
+
transaction_type = WriteCachedTransaction
|
| 730 |
+
|
| 731 |
+
def __init__(self, **kwargs):
|
| 732 |
+
kw = kwargs.copy()
|
| 733 |
+
for key in ["cache_check", "expiry_time", "check_files"]:
|
| 734 |
+
kw[key] = False
|
| 735 |
+
super().__init__(**kw)
|
| 736 |
+
for storage in self.storage:
|
| 737 |
+
if not os.path.exists(storage):
|
| 738 |
+
os.makedirs(storage, exist_ok=True)
|
| 739 |
+
|
| 740 |
+
def _check_file(self, path):
|
| 741 |
+
self._check_cache()
|
| 742 |
+
sha = self._mapper(path)
|
| 743 |
+
for storage in self.storage:
|
| 744 |
+
fn = os.path.join(storage, sha)
|
| 745 |
+
if os.path.exists(fn):
|
| 746 |
+
return fn
|
| 747 |
+
|
| 748 |
+
def save_cache(self):
|
| 749 |
+
pass
|
| 750 |
+
|
| 751 |
+
def load_cache(self):
|
| 752 |
+
pass
|
| 753 |
+
|
| 754 |
+
def pipe_file(self, path, value=None, **kwargs):
|
| 755 |
+
if self._intrans:
|
| 756 |
+
with self.open(path, "wb") as f:
|
| 757 |
+
f.write(value)
|
| 758 |
+
else:
|
| 759 |
+
super().pipe_file(path, value)
|
| 760 |
+
|
| 761 |
+
def ls(self, path, detail=True, **kwargs):
|
| 762 |
+
path = self._strip_protocol(path)
|
| 763 |
+
details = []
|
| 764 |
+
try:
|
| 765 |
+
details = self.fs.ls(
|
| 766 |
+
path, detail=True, **kwargs
|
| 767 |
+
).copy() # don't edit original!
|
| 768 |
+
except FileNotFoundError as e:
|
| 769 |
+
ex = e
|
| 770 |
+
else:
|
| 771 |
+
ex = None
|
| 772 |
+
if self._intrans:
|
| 773 |
+
path1 = path.rstrip("/") + "/"
|
| 774 |
+
for f in self.transaction.files:
|
| 775 |
+
if f.path == path:
|
| 776 |
+
details.append(
|
| 777 |
+
{"name": path, "size": f.size or f.tell(), "type": "file"}
|
| 778 |
+
)
|
| 779 |
+
elif f.path.startswith(path1):
|
| 780 |
+
if f.path.count("/") == path1.count("/"):
|
| 781 |
+
details.append(
|
| 782 |
+
{"name": f.path, "size": f.size or f.tell(), "type": "file"}
|
| 783 |
+
)
|
| 784 |
+
else:
|
| 785 |
+
dname = "/".join(f.path.split("/")[: path1.count("/") + 1])
|
| 786 |
+
details.append({"name": dname, "size": 0, "type": "directory"})
|
| 787 |
+
if ex is not None and not details:
|
| 788 |
+
raise ex
|
| 789 |
+
if detail:
|
| 790 |
+
return details
|
| 791 |
+
return sorted(_["name"] for _ in details)
|
| 792 |
+
|
| 793 |
+
def info(self, path, **kwargs):
|
| 794 |
+
path = self._strip_protocol(path)
|
| 795 |
+
if self._intrans:
|
| 796 |
+
f = [_ for _ in self.transaction.files if _.path == path]
|
| 797 |
+
if f:
|
| 798 |
+
size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
|
| 799 |
+
return {"name": path, "size": size, "type": "file"}
|
| 800 |
+
f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
|
| 801 |
+
if f:
|
| 802 |
+
return {"name": path, "size": 0, "type": "directory"}
|
| 803 |
+
return self.fs.info(path, **kwargs)
|
| 804 |
+
|
| 805 |
+
def pipe(self, path, value=None, **kwargs):
|
| 806 |
+
if isinstance(path, str):
|
| 807 |
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
| 808 |
+
elif isinstance(path, dict):
|
| 809 |
+
for k, v in path.items():
|
| 810 |
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
| 811 |
+
else:
|
| 812 |
+
raise ValueError("path must be str or dict")
|
| 813 |
+
|
| 814 |
+
def cat_ranges(
|
| 815 |
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
| 816 |
+
):
|
| 817 |
+
lpaths = [self._check_file(p) for p in paths]
|
| 818 |
+
rpaths = [p for l, p in zip(lpaths, paths) if l is False]
|
| 819 |
+
lpaths = [l for l, p in zip(lpaths, paths) if l is False]
|
| 820 |
+
self.fs.get(rpaths, lpaths)
|
| 821 |
+
return super().cat_ranges(
|
| 822 |
+
paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
|
| 823 |
+
)
|
| 824 |
+
|
| 825 |
+
def _open(self, path, mode="rb", **kwargs):
|
| 826 |
+
path = self._strip_protocol(path)
|
| 827 |
+
sha = self._mapper(path)
|
| 828 |
+
|
| 829 |
+
if "r" not in mode:
|
| 830 |
+
fn = os.path.join(self.storage[-1], sha)
|
| 831 |
+
user_specified_kwargs = {
|
| 832 |
+
k: v
|
| 833 |
+
for k, v in kwargs.items()
|
| 834 |
+
if k not in ["autocommit", "block_size", "cache_options"]
|
| 835 |
+
} # those were added by open()
|
| 836 |
+
return LocalTempFile(
|
| 837 |
+
self,
|
| 838 |
+
path,
|
| 839 |
+
mode=mode,
|
| 840 |
+
autocommit=not self._intrans,
|
| 841 |
+
fn=fn,
|
| 842 |
+
**user_specified_kwargs,
|
| 843 |
+
)
|
| 844 |
+
fn = self._check_file(path)
|
| 845 |
+
if fn:
|
| 846 |
+
return open(fn, mode)
|
| 847 |
+
|
| 848 |
+
fn = os.path.join(self.storage[-1], sha)
|
| 849 |
+
logger.debug("Copying %s to local cache", path)
|
| 850 |
+
kwargs["mode"] = mode
|
| 851 |
+
|
| 852 |
+
self._mkcache()
|
| 853 |
+
self._cache_size = None
|
| 854 |
+
if self.compression:
|
| 855 |
+
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
|
| 856 |
+
if isinstance(f, AbstractBufferedFile):
|
| 857 |
+
# want no type of caching if just downloading whole thing
|
| 858 |
+
f.cache = BaseCache(0, f.cache.fetcher, f.size)
|
| 859 |
+
comp = (
|
| 860 |
+
infer_compression(path)
|
| 861 |
+
if self.compression == "infer"
|
| 862 |
+
else self.compression
|
| 863 |
+
)
|
| 864 |
+
f = compr[comp](f, mode="rb")
|
| 865 |
+
data = True
|
| 866 |
+
while data:
|
| 867 |
+
block = getattr(f, "blocksize", 5 * 2**20)
|
| 868 |
+
data = f.read(block)
|
| 869 |
+
f2.write(data)
|
| 870 |
+
else:
|
| 871 |
+
self.fs.get_file(path, fn)
|
| 872 |
+
return self._open(path, mode)
|
| 873 |
+
|
| 874 |
+
|
| 875 |
+
class LocalTempFile:
|
| 876 |
+
"""A temporary local file, which will be uploaded on commit"""
|
| 877 |
+
|
| 878 |
+
def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
|
| 879 |
+
self.fn = fn
|
| 880 |
+
self.fh = open(fn, mode)
|
| 881 |
+
self.mode = mode
|
| 882 |
+
if seek:
|
| 883 |
+
self.fh.seek(seek)
|
| 884 |
+
self.path = path
|
| 885 |
+
self.size = None
|
| 886 |
+
self.fs = fs
|
| 887 |
+
self.closed = False
|
| 888 |
+
self.autocommit = autocommit
|
| 889 |
+
self.kwargs = kwargs
|
| 890 |
+
|
| 891 |
+
def __reduce__(self):
|
| 892 |
+
# always open in r+b to allow continuing writing at a location
|
| 893 |
+
return (
|
| 894 |
+
LocalTempFile,
|
| 895 |
+
(self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
|
| 896 |
+
)
|
| 897 |
+
|
| 898 |
+
def __enter__(self):
|
| 899 |
+
return self.fh
|
| 900 |
+
|
| 901 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 902 |
+
self.close()
|
| 903 |
+
|
| 904 |
+
def close(self):
|
| 905 |
+
# self.size = self.fh.tell()
|
| 906 |
+
if self.closed:
|
| 907 |
+
return
|
| 908 |
+
self.fh.close()
|
| 909 |
+
self.closed = True
|
| 910 |
+
if self.autocommit:
|
| 911 |
+
self.commit()
|
| 912 |
+
|
| 913 |
+
def discard(self):
|
| 914 |
+
self.fh.close()
|
| 915 |
+
os.remove(self.fn)
|
| 916 |
+
|
| 917 |
+
def commit(self):
|
| 918 |
+
self.fs.put(self.fn, self.path, **self.kwargs)
|
| 919 |
+
# we do not delete local copy - it's still in the cache
|
| 920 |
+
|
| 921 |
+
@property
|
| 922 |
+
def name(self):
|
| 923 |
+
return self.fn
|
| 924 |
+
|
| 925 |
+
def __repr__(self) -> str:
|
| 926 |
+
return f"LocalTempFile: {self.path}"
|
| 927 |
+
|
| 928 |
+
def __getattr__(self, item):
|
| 929 |
+
return getattr(self.fh, item)
|