koichi12 commited on
Commit
a10357c
·
verified ·
1 Parent(s): 3914b7f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .venv/lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc +0 -0
  2. .venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc +0 -0
  3. .venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc +0 -0
  4. .venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc +0 -0
  5. .venv/lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc +0 -0
  6. .venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc +0 -0
  7. .venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc +0 -0
  8. .venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc +0 -0
  9. .venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc +0 -0
  10. .venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc +0 -0
  11. .venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc +0 -0
  12. .venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc +0 -0
  13. .venv/lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc +0 -0
  14. .venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc +0 -0
  15. .venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc +0 -0
  16. .venv/lib/python3.11/site-packages/fsspec/__pycache__/json.cpython-311.pyc +0 -0
  17. .venv/lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc +0 -0
  18. .venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc +0 -0
  19. .venv/lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc +0 -0
  20. .venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc +0 -0
  21. .venv/lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc +0 -0
  22. .venv/lib/python3.11/site-packages/fsspec/implementations/__init__.py +0 -0
  23. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/__init__.cpython-311.pyc +0 -0
  24. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/arrow.cpython-311.pyc +0 -0
  25. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/asyn_wrapper.cpython-311.pyc +0 -0
  26. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc +0 -0
  27. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc +0 -0
  28. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cached.cpython-311.pyc +0 -0
  29. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dask.cpython-311.pyc +0 -0
  30. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/data.cpython-311.pyc +0 -0
  31. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc +0 -0
  32. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dirfs.cpython-311.pyc +0 -0
  33. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc +0 -0
  34. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc +0 -0
  35. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/github.cpython-311.pyc +0 -0
  36. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/http.cpython-311.pyc +0 -0
  37. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc +0 -0
  38. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc +0 -0
  39. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/local.cpython-311.pyc +0 -0
  40. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/memory.cpython-311.pyc +0 -0
  41. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/reference.cpython-311.pyc +0 -0
  42. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc +0 -0
  43. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/smb.cpython-311.pyc +0 -0
  44. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc +0 -0
  45. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc +0 -0
  46. .venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/zip.cpython-311.pyc +0 -0
  47. .venv/lib/python3.11/site-packages/fsspec/implementations/asyn_wrapper.py +99 -0
  48. .venv/lib/python3.11/site-packages/fsspec/implementations/cache_mapper.py +75 -0
  49. .venv/lib/python3.11/site-packages/fsspec/implementations/cache_metadata.py +232 -0
  50. .venv/lib/python3.11/site-packages/fsspec/implementations/cached.py +929 -0
.venv/lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.19 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc ADDED
Binary file (640 Bytes). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc ADDED
Binary file (4.71 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc ADDED
Binary file (52.7 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc ADDED
Binary file (41.7 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc ADDED
Binary file (13.8 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc ADDED
Binary file (8.05 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc ADDED
Binary file (6.67 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc ADDED
Binary file (3.4 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc ADDED
Binary file (32.4 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc ADDED
Binary file (4.72 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc ADDED
Binary file (955 Bytes). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc ADDED
Binary file (17.2 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc ADDED
Binary file (22.2 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc ADDED
Binary file (23.3 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/json.cpython-311.pyc ADDED
Binary file (8.16 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc ADDED
Binary file (14 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc ADDED
Binary file (17.7 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc ADDED
Binary file (11.8 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc ADDED
Binary file (5.11 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc ADDED
Binary file (32.2 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (195 Bytes). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/arrow.cpython-311.pyc ADDED
Binary file (15.6 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/asyn_wrapper.cpython-311.pyc ADDED
Binary file (5.1 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc ADDED
Binary file (4.68 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc ADDED
Binary file (12.8 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/cached.cpython-311.pyc ADDED
Binary file (50 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dask.cpython-311.pyc ADDED
Binary file (7.76 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/data.cpython-311.pyc ADDED
Binary file (3.5 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dbfs.cpython-311.pyc ADDED
Binary file (20.1 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/dirfs.cpython-311.pyc ADDED
Binary file (27 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/ftp.cpython-311.pyc ADDED
Binary file (20.1 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/git.cpython-311.pyc ADDED
Binary file (6.65 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/github.cpython-311.pyc ADDED
Binary file (12.9 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/http.cpython-311.pyc ADDED
Binary file (45.2 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/jupyter.cpython-311.pyc ADDED
Binary file (7.29 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/libarchive.cpython-311.pyc ADDED
Binary file (10.2 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/local.cpython-311.pyc ADDED
Binary file (26.8 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/memory.cpython-311.pyc ADDED
Binary file (16.5 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/reference.cpython-311.pyc ADDED
Binary file (73.8 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/sftp.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/smb.cpython-311.pyc ADDED
Binary file (20.3 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/tar.cpython-311.pyc ADDED
Binary file (5.37 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc ADDED
Binary file (24.8 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/__pycache__/zip.cpython-311.pyc ADDED
Binary file (9.59 kB). View file
 
.venv/lib/python3.11/site-packages/fsspec/implementations/asyn_wrapper.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import functools
3
+ import inspect
4
+
5
+ from fsspec.asyn import AsyncFileSystem
6
+
7
+
8
+ def async_wrapper(func, obj=None):
9
+ """
10
+ Wraps a synchronous function to make it awaitable.
11
+
12
+ Parameters
13
+ ----------
14
+ func : callable
15
+ The synchronous function to wrap.
16
+ obj : object, optional
17
+ The instance to bind the function to, if applicable.
18
+
19
+ Returns
20
+ -------
21
+ coroutine
22
+ An awaitable version of the function.
23
+ """
24
+
25
+ @functools.wraps(func)
26
+ async def wrapper(*args, **kwargs):
27
+ return await asyncio.to_thread(func, *args, **kwargs)
28
+
29
+ return wrapper
30
+
31
+
32
+ class AsyncFileSystemWrapper(AsyncFileSystem):
33
+ """
34
+ A wrapper class to convert a synchronous filesystem into an asynchronous one.
35
+
36
+ This class takes an existing synchronous filesystem implementation and wraps all
37
+ its methods to provide an asynchronous interface.
38
+
39
+ Parameters
40
+ ----------
41
+ sync_fs : AbstractFileSystem
42
+ The synchronous filesystem instance to wrap.
43
+ """
44
+
45
+ def __init__(self, sync_fs, *args, **kwargs):
46
+ super().__init__(*args, **kwargs)
47
+ self.asynchronous = True
48
+ self.sync_fs = sync_fs
49
+ self.protocol = self.sync_fs.protocol
50
+ self._wrap_all_sync_methods()
51
+
52
+ @property
53
+ def fsid(self):
54
+ return f"async_{self.sync_fs.fsid}"
55
+
56
+ def _wrap_all_sync_methods(self):
57
+ """
58
+ Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
59
+ """
60
+ excluded_methods = {"open"}
61
+ for method_name in dir(self.sync_fs):
62
+ if method_name.startswith("_") or method_name in excluded_methods:
63
+ continue
64
+
65
+ attr = inspect.getattr_static(self.sync_fs, method_name)
66
+ if isinstance(attr, property):
67
+ continue
68
+
69
+ method = getattr(self.sync_fs, method_name)
70
+ if callable(method) and not asyncio.iscoroutinefunction(method):
71
+ async_method = async_wrapper(method, obj=self)
72
+ setattr(self, f"_{method_name}", async_method)
73
+
74
+ @classmethod
75
+ def wrap_class(cls, sync_fs_class):
76
+ """
77
+ Create a new class that can be used to instantiate an AsyncFileSystemWrapper
78
+ with lazy instantiation of the underlying synchronous filesystem.
79
+
80
+ Parameters
81
+ ----------
82
+ sync_fs_class : type
83
+ The class of the synchronous filesystem to wrap.
84
+
85
+ Returns
86
+ -------
87
+ type
88
+ A new class that wraps the provided synchronous filesystem class.
89
+ """
90
+
91
+ class GeneratedAsyncFileSystemWrapper(cls):
92
+ def __init__(self, *args, **kwargs):
93
+ sync_fs = sync_fs_class(*args, **kwargs)
94
+ super().__init__(sync_fs)
95
+
96
+ GeneratedAsyncFileSystemWrapper.__name__ = (
97
+ f"Async{sync_fs_class.__name__}Wrapper"
98
+ )
99
+ return GeneratedAsyncFileSystemWrapper
.venv/lib/python3.11/site-packages/fsspec/implementations/cache_mapper.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import hashlib
5
+
6
+ from fsspec.implementations.local import make_path_posix
7
+
8
+
9
+ class AbstractCacheMapper(abc.ABC):
10
+ """Abstract super-class for mappers from remote URLs to local cached
11
+ basenames.
12
+ """
13
+
14
+ @abc.abstractmethod
15
+ def __call__(self, path: str) -> str: ...
16
+
17
+ def __eq__(self, other: object) -> bool:
18
+ # Identity only depends on class. When derived classes have attributes
19
+ # they will need to be included.
20
+ return isinstance(other, type(self))
21
+
22
+ def __hash__(self) -> int:
23
+ # Identity only depends on class. When derived classes have attributes
24
+ # they will need to be included.
25
+ return hash(type(self))
26
+
27
+
28
+ class BasenameCacheMapper(AbstractCacheMapper):
29
+ """Cache mapper that uses the basename of the remote URL and a fixed number
30
+ of directory levels above this.
31
+
32
+ The default is zero directory levels, meaning different paths with the same
33
+ basename will have the same cached basename.
34
+ """
35
+
36
+ def __init__(self, directory_levels: int = 0):
37
+ if directory_levels < 0:
38
+ raise ValueError(
39
+ "BasenameCacheMapper requires zero or positive directory_levels"
40
+ )
41
+ self.directory_levels = directory_levels
42
+
43
+ # Separator for directories when encoded as strings.
44
+ self._separator = "_@_"
45
+
46
+ def __call__(self, path: str) -> str:
47
+ path = make_path_posix(path)
48
+ prefix, *bits = path.rsplit("/", self.directory_levels + 1)
49
+ if bits:
50
+ return self._separator.join(bits)
51
+ else:
52
+ return prefix # No separator found, simple filename
53
+
54
+ def __eq__(self, other: object) -> bool:
55
+ return super().__eq__(other) and self.directory_levels == other.directory_levels
56
+
57
+ def __hash__(self) -> int:
58
+ return super().__hash__() ^ hash(self.directory_levels)
59
+
60
+
61
+ class HashCacheMapper(AbstractCacheMapper):
62
+ """Cache mapper that uses a hash of the remote URL."""
63
+
64
+ def __call__(self, path: str) -> str:
65
+ return hashlib.sha256(path.encode()).hexdigest()
66
+
67
+
68
+ def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
69
+ """Factory method to create cache mapper for backward compatibility with
70
+ ``CachingFileSystem`` constructor using ``same_names`` kwarg.
71
+ """
72
+ if same_names:
73
+ return BasenameCacheMapper()
74
+ else:
75
+ return HashCacheMapper()
.venv/lib/python3.11/site-packages/fsspec/implementations/cache_metadata.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import pickle
5
+ import time
6
+ from typing import TYPE_CHECKING
7
+
8
+ from fsspec.utils import atomic_write
9
+
10
+ try:
11
+ import ujson as json
12
+ except ImportError:
13
+ if not TYPE_CHECKING:
14
+ import json
15
+
16
+ if TYPE_CHECKING:
17
+ from typing import Any, Dict, Iterator, Literal
18
+
19
+ from typing_extensions import TypeAlias
20
+
21
+ from .cached import CachingFileSystem
22
+
23
+ Detail: TypeAlias = Dict[str, Any]
24
+
25
+
26
+ class CacheMetadata:
27
+ """Cache metadata.
28
+
29
+ All reading and writing of cache metadata is performed by this class,
30
+ accessing the cached files and blocks is not.
31
+
32
+ Metadata is stored in a single file per storage directory in JSON format.
33
+ For backward compatibility, also reads metadata stored in pickle format
34
+ which is converted to JSON when next saved.
35
+ """
36
+
37
+ def __init__(self, storage: list[str]):
38
+ """
39
+
40
+ Parameters
41
+ ----------
42
+ storage: list[str]
43
+ Directories containing cached files, must be at least one. Metadata
44
+ is stored in the last of these directories by convention.
45
+ """
46
+ if not storage:
47
+ raise ValueError("CacheMetadata expects at least one storage location")
48
+
49
+ self._storage = storage
50
+ self.cached_files: list[Detail] = [{}]
51
+
52
+ # Private attribute to force saving of metadata in pickle format rather than
53
+ # JSON for use in tests to confirm can read both pickle and JSON formats.
54
+ self._force_save_pickle = False
55
+
56
+ def _load(self, fn: str) -> Detail:
57
+ """Low-level function to load metadata from specific file"""
58
+ try:
59
+ with open(fn, "r") as f:
60
+ loaded = json.load(f)
61
+ except ValueError:
62
+ with open(fn, "rb") as f:
63
+ loaded = pickle.load(f)
64
+ for c in loaded.values():
65
+ if isinstance(c.get("blocks"), list):
66
+ c["blocks"] = set(c["blocks"])
67
+ return loaded
68
+
69
+ def _save(self, metadata_to_save: Detail, fn: str) -> None:
70
+ """Low-level function to save metadata to specific file"""
71
+ if self._force_save_pickle:
72
+ with atomic_write(fn) as f:
73
+ pickle.dump(metadata_to_save, f)
74
+ else:
75
+ with atomic_write(fn, mode="w") as f:
76
+ json.dump(metadata_to_save, f)
77
+
78
+ def _scan_locations(
79
+ self, writable_only: bool = False
80
+ ) -> Iterator[tuple[str, str, bool]]:
81
+ """Yield locations (filenames) where metadata is stored, and whether
82
+ writable or not.
83
+
84
+ Parameters
85
+ ----------
86
+ writable: bool
87
+ Set to True to only yield writable locations.
88
+
89
+ Returns
90
+ -------
91
+ Yields (str, str, bool)
92
+ """
93
+ n = len(self._storage)
94
+ for i, storage in enumerate(self._storage):
95
+ writable = i == n - 1
96
+ if writable_only and not writable:
97
+ continue
98
+ yield os.path.join(storage, "cache"), storage, writable
99
+
100
+ def check_file(
101
+ self, path: str, cfs: CachingFileSystem | None
102
+ ) -> Literal[False] | tuple[Detail, str]:
103
+ """If path is in cache return its details, otherwise return ``False``.
104
+
105
+ If the optional CachingFileSystem is specified then it is used to
106
+ perform extra checks to reject possible matches, such as if they are
107
+ too old.
108
+ """
109
+ for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files):
110
+ if path not in cache:
111
+ continue
112
+ detail = cache[path].copy()
113
+
114
+ if cfs is not None:
115
+ if cfs.check_files and detail["uid"] != cfs.fs.ukey(path):
116
+ # Wrong file as determined by hash of file properties
117
+ continue
118
+ if cfs.expiry and time.time() - detail["time"] > cfs.expiry:
119
+ # Cached file has expired
120
+ continue
121
+
122
+ fn = os.path.join(base, detail["fn"])
123
+ if os.path.exists(fn):
124
+ return detail, fn
125
+ return False
126
+
127
+ def clear_expired(self, expiry_time: int) -> tuple[list[str], bool]:
128
+ """Remove expired metadata from the cache.
129
+
130
+ Returns names of files corresponding to expired metadata and a boolean
131
+ flag indicating whether the writable cache is empty. Caller is
132
+ responsible for deleting the expired files.
133
+ """
134
+ expired_files = []
135
+ for path, detail in self.cached_files[-1].copy().items():
136
+ if time.time() - detail["time"] > expiry_time:
137
+ fn = detail.get("fn", "")
138
+ if not fn:
139
+ raise RuntimeError(
140
+ f"Cache metadata does not contain 'fn' for {path}"
141
+ )
142
+ fn = os.path.join(self._storage[-1], fn)
143
+ expired_files.append(fn)
144
+ self.cached_files[-1].pop(path)
145
+
146
+ if self.cached_files[-1]:
147
+ cache_path = os.path.join(self._storage[-1], "cache")
148
+ self._save(self.cached_files[-1], cache_path)
149
+
150
+ writable_cache_empty = not self.cached_files[-1]
151
+ return expired_files, writable_cache_empty
152
+
153
+ def load(self) -> None:
154
+ """Load all metadata from disk and store in ``self.cached_files``"""
155
+ cached_files = []
156
+ for fn, _, _ in self._scan_locations():
157
+ if os.path.exists(fn):
158
+ # TODO: consolidate blocks here
159
+ cached_files.append(self._load(fn))
160
+ else:
161
+ cached_files.append({})
162
+ self.cached_files = cached_files or [{}]
163
+
164
+ def on_close_cached_file(self, f: Any, path: str) -> None:
165
+ """Perform side-effect actions on closing a cached file.
166
+
167
+ The actual closing of the file is the responsibility of the caller.
168
+ """
169
+ # File must be writeble, so in self.cached_files[-1]
170
+ c = self.cached_files[-1][path]
171
+ if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
172
+ c["blocks"] = True
173
+
174
+ def pop_file(self, path: str) -> str | None:
175
+ """Remove metadata of cached file.
176
+
177
+ If path is in the cache, return the filename of the cached file,
178
+ otherwise return ``None``. Caller is responsible for deleting the
179
+ cached file.
180
+ """
181
+ details = self.check_file(path, None)
182
+ if not details:
183
+ return None
184
+ _, fn = details
185
+ if fn.startswith(self._storage[-1]):
186
+ self.cached_files[-1].pop(path)
187
+ self.save()
188
+ else:
189
+ raise PermissionError(
190
+ "Can only delete cached file in last, writable cache location"
191
+ )
192
+ return fn
193
+
194
+ def save(self) -> None:
195
+ """Save metadata to disk"""
196
+ for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files):
197
+ if not writable:
198
+ continue
199
+
200
+ if os.path.exists(fn):
201
+ cached_files = self._load(fn)
202
+ for k, c in cached_files.items():
203
+ if k in cache:
204
+ if c["blocks"] is True or cache[k]["blocks"] is True:
205
+ c["blocks"] = True
206
+ else:
207
+ # self.cached_files[*][*]["blocks"] must continue to
208
+ # point to the same set object so that updates
209
+ # performed by MMapCache are propagated back to
210
+ # self.cached_files.
211
+ blocks = cache[k]["blocks"]
212
+ blocks.update(c["blocks"])
213
+ c["blocks"] = blocks
214
+ c["time"] = max(c["time"], cache[k]["time"])
215
+ c["uid"] = cache[k]["uid"]
216
+
217
+ # Files can be added to cache after it was written once
218
+ for k, c in cache.items():
219
+ if k not in cached_files:
220
+ cached_files[k] = c
221
+ else:
222
+ cached_files = cache
223
+ cache = {k: v.copy() for k, v in cached_files.items()}
224
+ for c in cache.values():
225
+ if isinstance(c["blocks"], set):
226
+ c["blocks"] = list(c["blocks"])
227
+ self._save(cache, fn)
228
+ self.cached_files[-1] = cached_files
229
+
230
+ def update_file(self, path: str, detail: Detail) -> None:
231
+ """Update metadata for specific file in memory, do not save"""
232
+ self.cached_files[-1][path] = detail
.venv/lib/python3.11/site-packages/fsspec/implementations/cached.py ADDED
@@ -0,0 +1,929 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import tempfile
7
+ import time
8
+ import weakref
9
+ from shutil import rmtree
10
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar
11
+
12
+ from fsspec import AbstractFileSystem, filesystem
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
14
+ from fsspec.compression import compr
15
+ from fsspec.core import BaseCache, MMapCache
16
+ from fsspec.exceptions import BlocksizeMismatchError
17
+ from fsspec.implementations.cache_mapper import create_cache_mapper
18
+ from fsspec.implementations.cache_metadata import CacheMetadata
19
+ from fsspec.spec import AbstractBufferedFile
20
+ from fsspec.transaction import Transaction
21
+ from fsspec.utils import infer_compression
22
+
23
+ if TYPE_CHECKING:
24
+ from fsspec.implementations.cache_mapper import AbstractCacheMapper
25
+
26
+ logger = logging.getLogger("fsspec.cached")
27
+
28
+
29
+ class WriteCachedTransaction(Transaction):
30
+ def complete(self, commit=True):
31
+ rpaths = [f.path for f in self.files]
32
+ lpaths = [f.fn for f in self.files]
33
+ if commit:
34
+ self.fs.put(lpaths, rpaths)
35
+ self.files.clear()
36
+ self.fs._intrans = False
37
+ self.fs._transaction = None
38
+ self.fs = None # break cycle
39
+
40
+
41
+ class CachingFileSystem(AbstractFileSystem):
42
+ """Locally caching filesystem, layer over any other FS
43
+
44
+ This class implements chunk-wise local storage of remote files, for quick
45
+ access after the initial download. The files are stored in a given
46
+ directory with hashes of URLs for the filenames. If no directory is given,
47
+ a temporary one is used, which should be cleaned up by the OS after the
48
+ process ends. The files themselves are sparse (as implemented in
49
+ :class:`~fsspec.caching.MMapCache`), so only the data which is accessed
50
+ takes up space.
51
+
52
+ Restrictions:
53
+
54
+ - the block-size must be the same for each access of a given file, unless
55
+ all blocks of the file have already been read
56
+ - caching can only be applied to file-systems which produce files
57
+ derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
58
+ allowed, for testing
59
+ """
60
+
61
+ protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
62
+
63
+ def __init__(
64
+ self,
65
+ target_protocol=None,
66
+ cache_storage="TMP",
67
+ cache_check=10,
68
+ check_files=False,
69
+ expiry_time=604800,
70
+ target_options=None,
71
+ fs=None,
72
+ same_names: bool | None = None,
73
+ compression=None,
74
+ cache_mapper: AbstractCacheMapper | None = None,
75
+ **kwargs,
76
+ ):
77
+ """
78
+
79
+ Parameters
80
+ ----------
81
+ target_protocol: str (optional)
82
+ Target filesystem protocol. Provide either this or ``fs``.
83
+ cache_storage: str or list(str)
84
+ Location to store files. If "TMP", this is a temporary directory,
85
+ and will be cleaned up by the OS when this process ends (or later).
86
+ If a list, each location will be tried in the order given, but
87
+ only the last will be considered writable.
88
+ cache_check: int
89
+ Number of seconds between reload of cache metadata
90
+ check_files: bool
91
+ Whether to explicitly see if the UID of the remote file matches
92
+ the stored one before using. Warning: some file systems such as
93
+ HTTP cannot reliably give a unique hash of the contents of some
94
+ path, so be sure to set this option to False.
95
+ expiry_time: int
96
+ The time in seconds after which a local copy is considered useless.
97
+ Set to falsy to prevent expiry. The default is equivalent to one
98
+ week.
99
+ target_options: dict or None
100
+ Passed to the instantiation of the FS, if fs is None.
101
+ fs: filesystem instance
102
+ The target filesystem to run against. Provide this or ``protocol``.
103
+ same_names: bool (optional)
104
+ By default, target URLs are hashed using a ``HashCacheMapper`` so
105
+ that files from different backends with the same basename do not
106
+ conflict. If this argument is ``true``, a ``BasenameCacheMapper``
107
+ is used instead. Other cache mapper options are available by using
108
+ the ``cache_mapper`` keyword argument. Only one of this and
109
+ ``cache_mapper`` should be specified.
110
+ compression: str (optional)
111
+ To decompress on download. Can be 'infer' (guess from the URL name),
112
+ one of the entries in ``fsspec.compression.compr``, or None for no
113
+ decompression.
114
+ cache_mapper: AbstractCacheMapper (optional)
115
+ The object use to map from original filenames to cached filenames.
116
+ Only one of this and ``same_names`` should be specified.
117
+ """
118
+ super().__init__(**kwargs)
119
+ if fs is None and target_protocol is None:
120
+ raise ValueError(
121
+ "Please provide filesystem instance(fs) or target_protocol"
122
+ )
123
+ if not (fs is None) ^ (target_protocol is None):
124
+ raise ValueError(
125
+ "Both filesystems (fs) and target_protocol may not be both given."
126
+ )
127
+ if cache_storage == "TMP":
128
+ tempdir = tempfile.mkdtemp()
129
+ storage = [tempdir]
130
+ weakref.finalize(self, self._remove_tempdir, tempdir)
131
+ else:
132
+ if isinstance(cache_storage, str):
133
+ storage = [cache_storage]
134
+ else:
135
+ storage = cache_storage
136
+ os.makedirs(storage[-1], exist_ok=True)
137
+ self.storage = storage
138
+ self.kwargs = target_options or {}
139
+ self.cache_check = cache_check
140
+ self.check_files = check_files
141
+ self.expiry = expiry_time
142
+ self.compression = compression
143
+
144
+ # Size of cache in bytes. If None then the size is unknown and will be
145
+ # recalculated the next time cache_size() is called. On writes to the
146
+ # cache this is reset to None.
147
+ self._cache_size = None
148
+
149
+ if same_names is not None and cache_mapper is not None:
150
+ raise ValueError(
151
+ "Cannot specify both same_names and cache_mapper in "
152
+ "CachingFileSystem.__init__"
153
+ )
154
+ if cache_mapper is not None:
155
+ self._mapper = cache_mapper
156
+ else:
157
+ self._mapper = create_cache_mapper(
158
+ same_names if same_names is not None else False
159
+ )
160
+
161
+ self.target_protocol = (
162
+ target_protocol
163
+ if isinstance(target_protocol, str)
164
+ else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0])
165
+ )
166
+ self._metadata = CacheMetadata(self.storage)
167
+ self.load_cache()
168
+ self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs)
169
+
170
+ def _strip_protocol(path):
171
+ # acts as a method, since each instance has a difference target
172
+ return self.fs._strip_protocol(type(self)._strip_protocol(path))
173
+
174
+ self._strip_protocol: Callable = _strip_protocol
175
+
176
+ @staticmethod
177
+ def _remove_tempdir(tempdir):
178
+ try:
179
+ rmtree(tempdir)
180
+ except Exception:
181
+ pass
182
+
183
+ def _mkcache(self):
184
+ os.makedirs(self.storage[-1], exist_ok=True)
185
+
186
+ def cache_size(self):
187
+ """Return size of cache in bytes.
188
+
189
+ If more than one cache directory is in use, only the size of the last
190
+ one (the writable cache directory) is returned.
191
+ """
192
+ if self._cache_size is None:
193
+ cache_dir = self.storage[-1]
194
+ self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
195
+ return self._cache_size
196
+
197
+ def load_cache(self):
198
+ """Read set of stored blocks from file"""
199
+ self._metadata.load()
200
+ self._mkcache()
201
+ self.last_cache = time.time()
202
+
203
+ def save_cache(self):
204
+ """Save set of stored blocks from file"""
205
+ self._mkcache()
206
+ self._metadata.save()
207
+ self.last_cache = time.time()
208
+ self._cache_size = None
209
+
210
+ def _check_cache(self):
211
+ """Reload caches if time elapsed or any disappeared"""
212
+ self._mkcache()
213
+ if not self.cache_check:
214
+ # explicitly told not to bother checking
215
+ return
216
+ timecond = time.time() - self.last_cache > self.cache_check
217
+ existcond = all(os.path.exists(storage) for storage in self.storage)
218
+ if timecond or not existcond:
219
+ self.load_cache()
220
+
221
+ def _check_file(self, path):
222
+ """Is path in cache and still valid"""
223
+ path = self._strip_protocol(path)
224
+ self._check_cache()
225
+ return self._metadata.check_file(path, self)
226
+
227
+ def clear_cache(self):
228
+ """Remove all files and metadata from the cache
229
+
230
+ In the case of multiple cache locations, this clears only the last one,
231
+ which is assumed to be the read/write one.
232
+ """
233
+ rmtree(self.storage[-1])
234
+ self.load_cache()
235
+ self._cache_size = None
236
+
237
+ def clear_expired_cache(self, expiry_time=None):
238
+ """Remove all expired files and metadata from the cache
239
+
240
+ In the case of multiple cache locations, this clears only the last one,
241
+ which is assumed to be the read/write one.
242
+
243
+ Parameters
244
+ ----------
245
+ expiry_time: int
246
+ The time in seconds after which a local copy is considered useless.
247
+ If not defined the default is equivalent to the attribute from the
248
+ file caching instantiation.
249
+ """
250
+
251
+ if not expiry_time:
252
+ expiry_time = self.expiry
253
+
254
+ self._check_cache()
255
+
256
+ expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time)
257
+ for fn in expired_files:
258
+ if os.path.exists(fn):
259
+ os.remove(fn)
260
+
261
+ if writable_cache_empty:
262
+ rmtree(self.storage[-1])
263
+ self.load_cache()
264
+
265
+ self._cache_size = None
266
+
267
+ def pop_from_cache(self, path):
268
+ """Remove cached version of given file
269
+
270
+ Deletes local copy of the given (remote) path. If it is found in a cache
271
+ location which is not the last, it is assumed to be read-only, and
272
+ raises PermissionError
273
+ """
274
+ path = self._strip_protocol(path)
275
+ fn = self._metadata.pop_file(path)
276
+ if fn is not None:
277
+ os.remove(fn)
278
+ self._cache_size = None
279
+
280
+ def _open(
281
+ self,
282
+ path,
283
+ mode="rb",
284
+ block_size=None,
285
+ autocommit=True,
286
+ cache_options=None,
287
+ **kwargs,
288
+ ):
289
+ """Wrap the target _open
290
+
291
+ If the whole file exists in the cache, just open it locally and
292
+ return that.
293
+
294
+ Otherwise, open the file on the target FS, and make it have a mmap
295
+ cache pointing to the location which we determine, in our cache.
296
+ The ``blocks`` instance is shared, so as the mmap cache instance
297
+ updates, so does the entry in our ``cached_files`` attribute.
298
+ We monkey-patch this file, so that when it closes, we call
299
+ ``close_and_update`` to save the state of the blocks.
300
+ """
301
+ path = self._strip_protocol(path)
302
+
303
+ path = self.fs._strip_protocol(path)
304
+ if "r" not in mode:
305
+ return self.fs._open(
306
+ path,
307
+ mode=mode,
308
+ block_size=block_size,
309
+ autocommit=autocommit,
310
+ cache_options=cache_options,
311
+ **kwargs,
312
+ )
313
+ detail = self._check_file(path)
314
+ if detail:
315
+ # file is in cache
316
+ detail, fn = detail
317
+ hash, blocks = detail["fn"], detail["blocks"]
318
+ if blocks is True:
319
+ # stored file is complete
320
+ logger.debug("Opening local copy of %s", path)
321
+ return open(fn, mode)
322
+ # TODO: action where partial file exists in read-only cache
323
+ logger.debug("Opening partially cached copy of %s", path)
324
+ else:
325
+ hash = self._mapper(path)
326
+ fn = os.path.join(self.storage[-1], hash)
327
+ blocks = set()
328
+ detail = {
329
+ "original": path,
330
+ "fn": hash,
331
+ "blocks": blocks,
332
+ "time": time.time(),
333
+ "uid": self.fs.ukey(path),
334
+ }
335
+ self._metadata.update_file(path, detail)
336
+ logger.debug("Creating local sparse file for %s", path)
337
+
338
+ # call target filesystems open
339
+ self._mkcache()
340
+ f = self.fs._open(
341
+ path,
342
+ mode=mode,
343
+ block_size=block_size,
344
+ autocommit=autocommit,
345
+ cache_options=cache_options,
346
+ cache_type="none",
347
+ **kwargs,
348
+ )
349
+ if self.compression:
350
+ comp = (
351
+ infer_compression(path)
352
+ if self.compression == "infer"
353
+ else self.compression
354
+ )
355
+ f = compr[comp](f, mode="rb")
356
+ if "blocksize" in detail:
357
+ if detail["blocksize"] != f.blocksize:
358
+ raise BlocksizeMismatchError(
359
+ f"Cached file must be reopened with same block"
360
+ f" size as original (old: {detail['blocksize']},"
361
+ f" new {f.blocksize})"
362
+ )
363
+ else:
364
+ detail["blocksize"] = f.blocksize
365
+ f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
366
+ close = f.close
367
+ f.close = lambda: self.close_and_update(f, close)
368
+ self.save_cache()
369
+ return f
370
+
371
+ def _parent(self, path):
372
+ return self.fs._parent(path)
373
+
374
+ def hash_name(self, path: str, *args: Any) -> str:
375
+ # Kept for backward compatibility with downstream libraries.
376
+ # Ignores extra arguments, previously same_name boolean.
377
+ return self._mapper(path)
378
+
379
+ def close_and_update(self, f, close):
380
+ """Called when a file is closing, so store the set of blocks"""
381
+ if f.closed:
382
+ return
383
+ path = self._strip_protocol(f.path)
384
+ self._metadata.on_close_cached_file(f, path)
385
+ try:
386
+ logger.debug("going to save")
387
+ self.save_cache()
388
+ logger.debug("saved")
389
+ except OSError:
390
+ logger.debug("Cache saving failed while closing file")
391
+ except NameError:
392
+ logger.debug("Cache save failed due to interpreter shutdown")
393
+ close()
394
+ f.closed = True
395
+
396
+ def ls(self, path, detail=True):
397
+ return self.fs.ls(path, detail)
398
+
399
+ def __getattribute__(self, item):
400
+ if item in {
401
+ "load_cache",
402
+ "_open",
403
+ "save_cache",
404
+ "close_and_update",
405
+ "__init__",
406
+ "__getattribute__",
407
+ "__reduce__",
408
+ "_make_local_details",
409
+ "open",
410
+ "cat",
411
+ "cat_file",
412
+ "cat_ranges",
413
+ "get",
414
+ "read_block",
415
+ "tail",
416
+ "head",
417
+ "info",
418
+ "ls",
419
+ "exists",
420
+ "isfile",
421
+ "isdir",
422
+ "_check_file",
423
+ "_check_cache",
424
+ "_mkcache",
425
+ "clear_cache",
426
+ "clear_expired_cache",
427
+ "pop_from_cache",
428
+ "local_file",
429
+ "_paths_from_path",
430
+ "get_mapper",
431
+ "open_many",
432
+ "commit_many",
433
+ "hash_name",
434
+ "__hash__",
435
+ "__eq__",
436
+ "to_json",
437
+ "to_dict",
438
+ "cache_size",
439
+ "pipe_file",
440
+ "pipe",
441
+ "start_transaction",
442
+ "end_transaction",
443
+ }:
444
+ # all the methods defined in this class. Note `open` here, since
445
+ # it calls `_open`, but is actually in superclass
446
+ return lambda *args, **kw: getattr(type(self), item).__get__(self)(
447
+ *args, **kw
448
+ )
449
+ if item in ["__reduce_ex__"]:
450
+ raise AttributeError
451
+ if item in ["transaction"]:
452
+ # property
453
+ return type(self).transaction.__get__(self)
454
+ if item in ["_cache", "transaction_type"]:
455
+ # class attributes
456
+ return getattr(type(self), item)
457
+ if item == "__class__":
458
+ return type(self)
459
+ d = object.__getattribute__(self, "__dict__")
460
+ fs = d.get("fs", None) # fs is not immediately defined
461
+ if item in d:
462
+ return d[item]
463
+ elif fs is not None:
464
+ if item in fs.__dict__:
465
+ # attribute of instance
466
+ return fs.__dict__[item]
467
+ # attributed belonging to the target filesystem
468
+ cls = type(fs)
469
+ m = getattr(cls, item)
470
+ if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
471
+ not hasattr(m, "__self__") or m.__self__ is None
472
+ ):
473
+ # instance method
474
+ return m.__get__(fs, cls)
475
+ return m # class method or attribute
476
+ else:
477
+ # attributes of the superclass, while target is being set up
478
+ return super().__getattribute__(item)
479
+
480
+ def __eq__(self, other):
481
+ """Test for equality."""
482
+ if self is other:
483
+ return True
484
+ if not isinstance(other, type(self)):
485
+ return False
486
+ return (
487
+ self.storage == other.storage
488
+ and self.kwargs == other.kwargs
489
+ and self.cache_check == other.cache_check
490
+ and self.check_files == other.check_files
491
+ and self.expiry == other.expiry
492
+ and self.compression == other.compression
493
+ and self._mapper == other._mapper
494
+ and self.target_protocol == other.target_protocol
495
+ )
496
+
497
+ def __hash__(self):
498
+ """Calculate hash."""
499
+ return (
500
+ hash(tuple(self.storage))
501
+ ^ hash(str(self.kwargs))
502
+ ^ hash(self.cache_check)
503
+ ^ hash(self.check_files)
504
+ ^ hash(self.expiry)
505
+ ^ hash(self.compression)
506
+ ^ hash(self._mapper)
507
+ ^ hash(self.target_protocol)
508
+ )
509
+
510
+
511
+ class WholeFileCacheFileSystem(CachingFileSystem):
512
+ """Caches whole remote files on first access
513
+
514
+ This class is intended as a layer over any other file system, and
515
+ will make a local copy of each file accessed, so that all subsequent
516
+ reads are local. This is similar to ``CachingFileSystem``, but without
517
+ the block-wise functionality and so can work even when sparse files
518
+ are not allowed. See its docstring for definition of the init
519
+ arguments.
520
+
521
+ The class still needs access to the remote store for listing files,
522
+ and may refresh cached files.
523
+ """
524
+
525
+ protocol = "filecache"
526
+ local_file = True
527
+
528
+ def open_many(self, open_files, **kwargs):
529
+ paths = [of.path for of in open_files]
530
+ if "r" in open_files.mode:
531
+ self._mkcache()
532
+ else:
533
+ return [
534
+ LocalTempFile(
535
+ self.fs,
536
+ path,
537
+ mode=open_files.mode,
538
+ fn=os.path.join(self.storage[-1], self._mapper(path)),
539
+ **kwargs,
540
+ )
541
+ for path in paths
542
+ ]
543
+
544
+ if self.compression:
545
+ raise NotImplementedError
546
+ details = [self._check_file(sp) for sp in paths]
547
+ downpath = [p for p, d in zip(paths, details) if not d]
548
+ downfn0 = [
549
+ os.path.join(self.storage[-1], self._mapper(p))
550
+ for p, d in zip(paths, details)
551
+ ] # keep these path names for opening later
552
+ downfn = [fn for fn, d in zip(downfn0, details) if not d]
553
+ if downpath:
554
+ # skip if all files are already cached and up to date
555
+ self.fs.get(downpath, downfn)
556
+
557
+ # update metadata - only happens when downloads are successful
558
+ newdetail = [
559
+ {
560
+ "original": path,
561
+ "fn": self._mapper(path),
562
+ "blocks": True,
563
+ "time": time.time(),
564
+ "uid": self.fs.ukey(path),
565
+ }
566
+ for path in downpath
567
+ ]
568
+ for path, detail in zip(downpath, newdetail):
569
+ self._metadata.update_file(path, detail)
570
+ self.save_cache()
571
+
572
+ def firstpart(fn):
573
+ # helper to adapt both whole-file and simple-cache
574
+ return fn[1] if isinstance(fn, tuple) else fn
575
+
576
+ return [
577
+ open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode)
578
+ for fn0, fn1 in zip(details, downfn0)
579
+ ]
580
+
581
+ def commit_many(self, open_files):
582
+ self.fs.put([f.fn for f in open_files], [f.path for f in open_files])
583
+ [f.close() for f in open_files]
584
+ for f in open_files:
585
+ # in case autocommit is off, and so close did not already delete
586
+ try:
587
+ os.remove(f.name)
588
+ except FileNotFoundError:
589
+ pass
590
+ self._cache_size = None
591
+
592
+ def _make_local_details(self, path):
593
+ hash = self._mapper(path)
594
+ fn = os.path.join(self.storage[-1], hash)
595
+ detail = {
596
+ "original": path,
597
+ "fn": hash,
598
+ "blocks": True,
599
+ "time": time.time(),
600
+ "uid": self.fs.ukey(path),
601
+ }
602
+ self._metadata.update_file(path, detail)
603
+ logger.debug("Copying %s to local cache", path)
604
+ return fn
605
+
606
+ def cat(
607
+ self,
608
+ path,
609
+ recursive=False,
610
+ on_error="raise",
611
+ callback=DEFAULT_CALLBACK,
612
+ **kwargs,
613
+ ):
614
+ paths = self.expand_path(
615
+ path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
616
+ )
617
+ getpaths = []
618
+ storepaths = []
619
+ fns = []
620
+ out = {}
621
+ for p in paths.copy():
622
+ try:
623
+ detail = self._check_file(p)
624
+ if not detail:
625
+ fn = self._make_local_details(p)
626
+ getpaths.append(p)
627
+ storepaths.append(fn)
628
+ else:
629
+ detail, fn = detail if isinstance(detail, tuple) else (None, detail)
630
+ fns.append(fn)
631
+ except Exception as e:
632
+ if on_error == "raise":
633
+ raise
634
+ if on_error == "return":
635
+ out[p] = e
636
+ paths.remove(p)
637
+
638
+ if getpaths:
639
+ self.fs.get(getpaths, storepaths)
640
+ self.save_cache()
641
+
642
+ callback.set_size(len(paths))
643
+ for p, fn in zip(paths, fns):
644
+ with open(fn, "rb") as f:
645
+ out[p] = f.read()
646
+ callback.relative_update(1)
647
+ if isinstance(path, str) and len(paths) == 1 and recursive is False:
648
+ out = out[paths[0]]
649
+ return out
650
+
651
+ def _open(self, path, mode="rb", **kwargs):
652
+ path = self._strip_protocol(path)
653
+ if "r" not in mode:
654
+ hash = self._mapper(path)
655
+ fn = os.path.join(self.storage[-1], hash)
656
+ user_specified_kwargs = {
657
+ k: v
658
+ for k, v in kwargs.items()
659
+ # those kwargs were added by open(), we don't want them
660
+ if k not in ["autocommit", "block_size", "cache_options"]
661
+ }
662
+ return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
663
+ detail = self._check_file(path)
664
+ if detail:
665
+ detail, fn = detail
666
+ _, blocks = detail["fn"], detail["blocks"]
667
+ if blocks is True:
668
+ logger.debug("Opening local copy of %s", path)
669
+
670
+ # In order to support downstream filesystems to be able to
671
+ # infer the compression from the original filename, like
672
+ # the `TarFileSystem`, let's extend the `io.BufferedReader`
673
+ # fileobject protocol by adding a dedicated attribute
674
+ # `original`.
675
+ f = open(fn, mode)
676
+ f.original = detail.get("original")
677
+ return f
678
+ else:
679
+ raise ValueError(
680
+ f"Attempt to open partially cached file {path}"
681
+ f" as a wholly cached file"
682
+ )
683
+ else:
684
+ fn = self._make_local_details(path)
685
+ kwargs["mode"] = mode
686
+
687
+ # call target filesystems open
688
+ self._mkcache()
689
+ if self.compression:
690
+ with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
691
+ if isinstance(f, AbstractBufferedFile):
692
+ # want no type of caching if just downloading whole thing
693
+ f.cache = BaseCache(0, f.cache.fetcher, f.size)
694
+ comp = (
695
+ infer_compression(path)
696
+ if self.compression == "infer"
697
+ else self.compression
698
+ )
699
+ f = compr[comp](f, mode="rb")
700
+ data = True
701
+ while data:
702
+ block = getattr(f, "blocksize", 5 * 2**20)
703
+ data = f.read(block)
704
+ f2.write(data)
705
+ else:
706
+ self.fs.get_file(path, fn)
707
+ self.save_cache()
708
+ return self._open(path, mode)
709
+
710
+
711
+ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
712
+ """Caches whole remote files on first access
713
+
714
+ This class is intended as a layer over any other file system, and
715
+ will make a local copy of each file accessed, so that all subsequent
716
+ reads are local. This implementation only copies whole files, and
717
+ does not keep any metadata about the download time or file details.
718
+ It is therefore safer to use in multi-threaded/concurrent situations.
719
+
720
+ This is the only of the caching filesystems that supports write: you will
721
+ be given a real local open file, and upon close and commit, it will be
722
+ uploaded to the target filesystem; the writability or the target URL is
723
+ not checked until that time.
724
+
725
+ """
726
+
727
+ protocol = "simplecache"
728
+ local_file = True
729
+ transaction_type = WriteCachedTransaction
730
+
731
+ def __init__(self, **kwargs):
732
+ kw = kwargs.copy()
733
+ for key in ["cache_check", "expiry_time", "check_files"]:
734
+ kw[key] = False
735
+ super().__init__(**kw)
736
+ for storage in self.storage:
737
+ if not os.path.exists(storage):
738
+ os.makedirs(storage, exist_ok=True)
739
+
740
+ def _check_file(self, path):
741
+ self._check_cache()
742
+ sha = self._mapper(path)
743
+ for storage in self.storage:
744
+ fn = os.path.join(storage, sha)
745
+ if os.path.exists(fn):
746
+ return fn
747
+
748
+ def save_cache(self):
749
+ pass
750
+
751
+ def load_cache(self):
752
+ pass
753
+
754
+ def pipe_file(self, path, value=None, **kwargs):
755
+ if self._intrans:
756
+ with self.open(path, "wb") as f:
757
+ f.write(value)
758
+ else:
759
+ super().pipe_file(path, value)
760
+
761
+ def ls(self, path, detail=True, **kwargs):
762
+ path = self._strip_protocol(path)
763
+ details = []
764
+ try:
765
+ details = self.fs.ls(
766
+ path, detail=True, **kwargs
767
+ ).copy() # don't edit original!
768
+ except FileNotFoundError as e:
769
+ ex = e
770
+ else:
771
+ ex = None
772
+ if self._intrans:
773
+ path1 = path.rstrip("/") + "/"
774
+ for f in self.transaction.files:
775
+ if f.path == path:
776
+ details.append(
777
+ {"name": path, "size": f.size or f.tell(), "type": "file"}
778
+ )
779
+ elif f.path.startswith(path1):
780
+ if f.path.count("/") == path1.count("/"):
781
+ details.append(
782
+ {"name": f.path, "size": f.size or f.tell(), "type": "file"}
783
+ )
784
+ else:
785
+ dname = "/".join(f.path.split("/")[: path1.count("/") + 1])
786
+ details.append({"name": dname, "size": 0, "type": "directory"})
787
+ if ex is not None and not details:
788
+ raise ex
789
+ if detail:
790
+ return details
791
+ return sorted(_["name"] for _ in details)
792
+
793
+ def info(self, path, **kwargs):
794
+ path = self._strip_protocol(path)
795
+ if self._intrans:
796
+ f = [_ for _ in self.transaction.files if _.path == path]
797
+ if f:
798
+ size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
799
+ return {"name": path, "size": size, "type": "file"}
800
+ f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
801
+ if f:
802
+ return {"name": path, "size": 0, "type": "directory"}
803
+ return self.fs.info(path, **kwargs)
804
+
805
+ def pipe(self, path, value=None, **kwargs):
806
+ if isinstance(path, str):
807
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
808
+ elif isinstance(path, dict):
809
+ for k, v in path.items():
810
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
811
+ else:
812
+ raise ValueError("path must be str or dict")
813
+
814
+ def cat_ranges(
815
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
816
+ ):
817
+ lpaths = [self._check_file(p) for p in paths]
818
+ rpaths = [p for l, p in zip(lpaths, paths) if l is False]
819
+ lpaths = [l for l, p in zip(lpaths, paths) if l is False]
820
+ self.fs.get(rpaths, lpaths)
821
+ return super().cat_ranges(
822
+ paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
823
+ )
824
+
825
+ def _open(self, path, mode="rb", **kwargs):
826
+ path = self._strip_protocol(path)
827
+ sha = self._mapper(path)
828
+
829
+ if "r" not in mode:
830
+ fn = os.path.join(self.storage[-1], sha)
831
+ user_specified_kwargs = {
832
+ k: v
833
+ for k, v in kwargs.items()
834
+ if k not in ["autocommit", "block_size", "cache_options"]
835
+ } # those were added by open()
836
+ return LocalTempFile(
837
+ self,
838
+ path,
839
+ mode=mode,
840
+ autocommit=not self._intrans,
841
+ fn=fn,
842
+ **user_specified_kwargs,
843
+ )
844
+ fn = self._check_file(path)
845
+ if fn:
846
+ return open(fn, mode)
847
+
848
+ fn = os.path.join(self.storage[-1], sha)
849
+ logger.debug("Copying %s to local cache", path)
850
+ kwargs["mode"] = mode
851
+
852
+ self._mkcache()
853
+ self._cache_size = None
854
+ if self.compression:
855
+ with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
856
+ if isinstance(f, AbstractBufferedFile):
857
+ # want no type of caching if just downloading whole thing
858
+ f.cache = BaseCache(0, f.cache.fetcher, f.size)
859
+ comp = (
860
+ infer_compression(path)
861
+ if self.compression == "infer"
862
+ else self.compression
863
+ )
864
+ f = compr[comp](f, mode="rb")
865
+ data = True
866
+ while data:
867
+ block = getattr(f, "blocksize", 5 * 2**20)
868
+ data = f.read(block)
869
+ f2.write(data)
870
+ else:
871
+ self.fs.get_file(path, fn)
872
+ return self._open(path, mode)
873
+
874
+
875
+ class LocalTempFile:
876
+ """A temporary local file, which will be uploaded on commit"""
877
+
878
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
879
+ self.fn = fn
880
+ self.fh = open(fn, mode)
881
+ self.mode = mode
882
+ if seek:
883
+ self.fh.seek(seek)
884
+ self.path = path
885
+ self.size = None
886
+ self.fs = fs
887
+ self.closed = False
888
+ self.autocommit = autocommit
889
+ self.kwargs = kwargs
890
+
891
+ def __reduce__(self):
892
+ # always open in r+b to allow continuing writing at a location
893
+ return (
894
+ LocalTempFile,
895
+ (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
896
+ )
897
+
898
+ def __enter__(self):
899
+ return self.fh
900
+
901
+ def __exit__(self, exc_type, exc_val, exc_tb):
902
+ self.close()
903
+
904
+ def close(self):
905
+ # self.size = self.fh.tell()
906
+ if self.closed:
907
+ return
908
+ self.fh.close()
909
+ self.closed = True
910
+ if self.autocommit:
911
+ self.commit()
912
+
913
+ def discard(self):
914
+ self.fh.close()
915
+ os.remove(self.fn)
916
+
917
+ def commit(self):
918
+ self.fs.put(self.fn, self.path, **self.kwargs)
919
+ # we do not delete local copy - it's still in the cache
920
+
921
+ @property
922
+ def name(self):
923
+ return self.fn
924
+
925
+ def __repr__(self) -> str:
926
+ return f"LocalTempFile: {self.path}"
927
+
928
+ def __getattr__(self, item):
929
+ return getattr(self.fh, item)