Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\joblib\memory.py with huggingface_hub
Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//joblib//memory.py
ADDED
|
@@ -0,0 +1,1242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
A context object for caching a function's return value each time it
|
| 3 |
+
is called with the same input arguments.
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
|
| 8 |
+
# Copyright (c) 2009 Gael Varoquaux
|
| 9 |
+
# License: BSD Style, 3 clauses.
|
| 10 |
+
|
| 11 |
+
import asyncio
|
| 12 |
+
import datetime
|
| 13 |
+
import functools
|
| 14 |
+
import inspect
|
| 15 |
+
import logging
|
| 16 |
+
import os
|
| 17 |
+
import pathlib
|
| 18 |
+
import pydoc
|
| 19 |
+
import re
|
| 20 |
+
import textwrap
|
| 21 |
+
import time
|
| 22 |
+
import tokenize
|
| 23 |
+
import traceback
|
| 24 |
+
import warnings
|
| 25 |
+
import weakref
|
| 26 |
+
|
| 27 |
+
from . import hashing
|
| 28 |
+
from ._store_backends import (
|
| 29 |
+
CacheWarning, # noqa
|
| 30 |
+
FileSystemStoreBackend,
|
| 31 |
+
StoreBackendBase,
|
| 32 |
+
)
|
| 33 |
+
from .func_inspect import (
|
| 34 |
+
filter_args,
|
| 35 |
+
format_call,
|
| 36 |
+
format_signature,
|
| 37 |
+
get_func_code,
|
| 38 |
+
get_func_name,
|
| 39 |
+
)
|
| 40 |
+
from .logger import Logger, format_time, pformat
|
| 41 |
+
|
| 42 |
+
FIRST_LINE_TEXT = "# first line:"
|
| 43 |
+
|
| 44 |
+
# TODO: The following object should have a data store object as a sub
|
| 45 |
+
# object, and the interface to persist and query should be separated in
|
| 46 |
+
# the data store.
|
| 47 |
+
#
|
| 48 |
+
# This would enable creating 'Memory' objects with a different logic for
|
| 49 |
+
# pickling that would simply span a MemorizedFunc with the same
|
| 50 |
+
# store (or do we want to copy it to avoid cross-talks?), for instance to
|
| 51 |
+
# implement HDF5 pickling.
|
| 52 |
+
|
| 53 |
+
# TODO: Same remark for the logger, and probably use the Python logging
|
| 54 |
+
# mechanism.
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def extract_first_line(func_code):
|
| 58 |
+
"""Extract the first line information from the function code
|
| 59 |
+
text if available.
|
| 60 |
+
"""
|
| 61 |
+
if func_code.startswith(FIRST_LINE_TEXT):
|
| 62 |
+
func_code = func_code.split("\n")
|
| 63 |
+
first_line = int(func_code[0][len(FIRST_LINE_TEXT) :])
|
| 64 |
+
func_code = "\n".join(func_code[1:])
|
| 65 |
+
else:
|
| 66 |
+
first_line = -1
|
| 67 |
+
return func_code, first_line
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class JobLibCollisionWarning(UserWarning):
|
| 71 |
+
"""Warn that there might be a collision between names of functions."""
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
_STORE_BACKENDS = {"local": FileSystemStoreBackend}
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def register_store_backend(backend_name, backend):
|
| 78 |
+
"""Extend available store backends.
|
| 79 |
+
|
| 80 |
+
The Memory, MemorizeResult and MemorizeFunc objects are designed to be
|
| 81 |
+
agnostic to the type of store used behind. By default, the local file
|
| 82 |
+
system is used but this function gives the possibility to extend joblib's
|
| 83 |
+
memory pattern with other types of storage such as cloud storage (S3, GCS,
|
| 84 |
+
OpenStack, HadoopFS, etc) or blob DBs.
|
| 85 |
+
|
| 86 |
+
Parameters
|
| 87 |
+
----------
|
| 88 |
+
backend_name: str
|
| 89 |
+
The name identifying the store backend being registered. For example,
|
| 90 |
+
'local' is used with FileSystemStoreBackend.
|
| 91 |
+
backend: StoreBackendBase subclass
|
| 92 |
+
The name of a class that implements the StoreBackendBase interface.
|
| 93 |
+
|
| 94 |
+
"""
|
| 95 |
+
if not isinstance(backend_name, str):
|
| 96 |
+
raise ValueError(
|
| 97 |
+
"Store backend name should be a string, '{0}' given.".format(backend_name)
|
| 98 |
+
)
|
| 99 |
+
if backend is None or not issubclass(backend, StoreBackendBase):
|
| 100 |
+
raise ValueError(
|
| 101 |
+
"Store backend should inherit StoreBackendBase, '{0}' given.".format(
|
| 102 |
+
backend
|
| 103 |
+
)
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
_STORE_BACKENDS[backend_name] = backend
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _store_backend_factory(backend, location, verbose=0, backend_options=None):
|
| 110 |
+
"""Return the correct store object for the given location."""
|
| 111 |
+
if backend_options is None:
|
| 112 |
+
backend_options = {}
|
| 113 |
+
|
| 114 |
+
if isinstance(location, pathlib.Path):
|
| 115 |
+
location = str(location)
|
| 116 |
+
|
| 117 |
+
if isinstance(location, StoreBackendBase):
|
| 118 |
+
return location
|
| 119 |
+
elif isinstance(location, str):
|
| 120 |
+
obj = None
|
| 121 |
+
location = os.path.expanduser(location)
|
| 122 |
+
# The location is not a local file system, we look in the
|
| 123 |
+
# registered backends if there's one matching the given backend
|
| 124 |
+
# name.
|
| 125 |
+
for backend_key, backend_obj in _STORE_BACKENDS.items():
|
| 126 |
+
if backend == backend_key:
|
| 127 |
+
obj = backend_obj()
|
| 128 |
+
|
| 129 |
+
# By default, we assume the FileSystemStoreBackend can be used if no
|
| 130 |
+
# matching backend could be found.
|
| 131 |
+
if obj is None:
|
| 132 |
+
raise TypeError(
|
| 133 |
+
"Unknown location {0} or backend {1}".format(location, backend)
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# The store backend is configured with the extra named parameters,
|
| 137 |
+
# some of them are specific to the underlying store backend.
|
| 138 |
+
obj.configure(location, verbose=verbose, backend_options=backend_options)
|
| 139 |
+
return obj
|
| 140 |
+
elif location is not None:
|
| 141 |
+
warnings.warn(
|
| 142 |
+
"Instantiating a backend using a {} as a location is not "
|
| 143 |
+
"supported by joblib. Returning None instead.".format(
|
| 144 |
+
location.__class__.__name__
|
| 145 |
+
),
|
| 146 |
+
UserWarning,
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def _build_func_identifier(func):
|
| 153 |
+
"""Build a roughly unique identifier for the cached function."""
|
| 154 |
+
modules, funcname = get_func_name(func)
|
| 155 |
+
# We reuse historical fs-like way of building a function identifier
|
| 156 |
+
return os.path.join(*modules, funcname)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
# An in-memory store to avoid looking at the disk-based function
|
| 160 |
+
# source code to check if a function definition has changed
|
| 161 |
+
_FUNCTION_HASHES = weakref.WeakKeyDictionary()
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
###############################################################################
|
| 165 |
+
# class `MemorizedResult`
|
| 166 |
+
###############################################################################
|
| 167 |
+
class MemorizedResult(Logger):
|
| 168 |
+
"""Object representing a cached value.
|
| 169 |
+
|
| 170 |
+
Attributes
|
| 171 |
+
----------
|
| 172 |
+
location: str
|
| 173 |
+
The location of joblib cache. Depends on the store backend used.
|
| 174 |
+
|
| 175 |
+
func: function or str
|
| 176 |
+
function whose output is cached. The string case is intended only for
|
| 177 |
+
instantiation based on the output of repr() on another instance.
|
| 178 |
+
(namely eval(repr(memorized_instance)) works).
|
| 179 |
+
|
| 180 |
+
argument_hash: str
|
| 181 |
+
hash of the function arguments.
|
| 182 |
+
|
| 183 |
+
backend: str
|
| 184 |
+
Type of store backend for reading/writing cache files.
|
| 185 |
+
Default is 'local'.
|
| 186 |
+
|
| 187 |
+
mmap_mode: {None, 'r+', 'r', 'w+', 'c'}
|
| 188 |
+
The memmapping mode used when loading from cache numpy arrays. See
|
| 189 |
+
numpy.load for the meaning of the different values.
|
| 190 |
+
|
| 191 |
+
verbose: int
|
| 192 |
+
verbosity level (0 means no message).
|
| 193 |
+
|
| 194 |
+
timestamp, metadata: string
|
| 195 |
+
for internal use only.
|
| 196 |
+
"""
|
| 197 |
+
|
| 198 |
+
def __init__(
|
| 199 |
+
self,
|
| 200 |
+
location,
|
| 201 |
+
call_id,
|
| 202 |
+
backend="local",
|
| 203 |
+
mmap_mode=None,
|
| 204 |
+
verbose=0,
|
| 205 |
+
timestamp=None,
|
| 206 |
+
metadata=None,
|
| 207 |
+
):
|
| 208 |
+
Logger.__init__(self)
|
| 209 |
+
self._call_id = call_id
|
| 210 |
+
self.store_backend = _store_backend_factory(backend, location, verbose=verbose)
|
| 211 |
+
self.mmap_mode = mmap_mode
|
| 212 |
+
|
| 213 |
+
if metadata is not None:
|
| 214 |
+
self.metadata = metadata
|
| 215 |
+
else:
|
| 216 |
+
self.metadata = self.store_backend.get_metadata(self._call_id)
|
| 217 |
+
|
| 218 |
+
self.duration = self.metadata.get("duration", None)
|
| 219 |
+
self.verbose = verbose
|
| 220 |
+
self.timestamp = timestamp
|
| 221 |
+
|
| 222 |
+
@property
|
| 223 |
+
def func(self):
|
| 224 |
+
return self.func_id
|
| 225 |
+
|
| 226 |
+
@property
|
| 227 |
+
def func_id(self):
|
| 228 |
+
return self._call_id[0]
|
| 229 |
+
|
| 230 |
+
@property
|
| 231 |
+
def args_id(self):
|
| 232 |
+
return self._call_id[1]
|
| 233 |
+
|
| 234 |
+
def get(self):
|
| 235 |
+
"""Read value from cache and return it."""
|
| 236 |
+
try:
|
| 237 |
+
return self.store_backend.load_item(
|
| 238 |
+
self._call_id,
|
| 239 |
+
timestamp=self.timestamp,
|
| 240 |
+
metadata=self.metadata,
|
| 241 |
+
verbose=self.verbose,
|
| 242 |
+
)
|
| 243 |
+
except ValueError as exc:
|
| 244 |
+
new_exc = KeyError(
|
| 245 |
+
"Error while trying to load a MemorizedResult's value. "
|
| 246 |
+
"It seems that this folder is corrupted : {}".format(
|
| 247 |
+
os.path.join(self.store_backend.location, *self._call_id)
|
| 248 |
+
)
|
| 249 |
+
)
|
| 250 |
+
raise new_exc from exc
|
| 251 |
+
|
| 252 |
+
def clear(self):
|
| 253 |
+
"""Clear value from cache"""
|
| 254 |
+
self.store_backend.clear_item(self._call_id)
|
| 255 |
+
|
| 256 |
+
def __repr__(self):
|
| 257 |
+
return '{}(location="{}", func="{}", args_id="{}")'.format(
|
| 258 |
+
self.__class__.__name__, self.store_backend.location, *self._call_id
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
def __getstate__(self):
|
| 262 |
+
state = self.__dict__.copy()
|
| 263 |
+
state["timestamp"] = None
|
| 264 |
+
return state
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
class NotMemorizedResult(object):
|
| 268 |
+
"""Class representing an arbitrary value.
|
| 269 |
+
|
| 270 |
+
This class is a replacement for MemorizedResult when there is no cache.
|
| 271 |
+
"""
|
| 272 |
+
|
| 273 |
+
__slots__ = ("value", "valid")
|
| 274 |
+
|
| 275 |
+
def __init__(self, value):
|
| 276 |
+
self.value = value
|
| 277 |
+
self.valid = True
|
| 278 |
+
|
| 279 |
+
def get(self):
|
| 280 |
+
if self.valid:
|
| 281 |
+
return self.value
|
| 282 |
+
else:
|
| 283 |
+
raise KeyError("No value stored.")
|
| 284 |
+
|
| 285 |
+
def clear(self):
|
| 286 |
+
self.valid = False
|
| 287 |
+
self.value = None
|
| 288 |
+
|
| 289 |
+
def __repr__(self):
|
| 290 |
+
if self.valid:
|
| 291 |
+
return "{class_name}({value})".format(
|
| 292 |
+
class_name=self.__class__.__name__, value=pformat(self.value)
|
| 293 |
+
)
|
| 294 |
+
else:
|
| 295 |
+
return self.__class__.__name__ + " with no value"
|
| 296 |
+
|
| 297 |
+
# __getstate__ and __setstate__ are required because of __slots__
|
| 298 |
+
def __getstate__(self):
|
| 299 |
+
return {"valid": self.valid, "value": self.value}
|
| 300 |
+
|
| 301 |
+
def __setstate__(self, state):
|
| 302 |
+
self.valid = state["valid"]
|
| 303 |
+
self.value = state["value"]
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
###############################################################################
|
| 307 |
+
# class `NotMemorizedFunc`
|
| 308 |
+
###############################################################################
|
| 309 |
+
class NotMemorizedFunc(object):
|
| 310 |
+
"""No-op object decorating a function.
|
| 311 |
+
|
| 312 |
+
This class replaces MemorizedFunc when there is no cache. It provides an
|
| 313 |
+
identical API but does not write anything on disk.
|
| 314 |
+
|
| 315 |
+
Attributes
|
| 316 |
+
----------
|
| 317 |
+
func: callable
|
| 318 |
+
Original undecorated function.
|
| 319 |
+
"""
|
| 320 |
+
|
| 321 |
+
# Should be a light as possible (for speed)
|
| 322 |
+
def __init__(self, func):
|
| 323 |
+
self.func = func
|
| 324 |
+
|
| 325 |
+
def __call__(self, *args, **kwargs):
|
| 326 |
+
return self.func(*args, **kwargs)
|
| 327 |
+
|
| 328 |
+
def call_and_shelve(self, *args, **kwargs):
|
| 329 |
+
return NotMemorizedResult(self.func(*args, **kwargs))
|
| 330 |
+
|
| 331 |
+
def __repr__(self):
|
| 332 |
+
return "{0}(func={1})".format(self.__class__.__name__, self.func)
|
| 333 |
+
|
| 334 |
+
def clear(self, warn=True):
|
| 335 |
+
# Argument "warn" is for compatibility with MemorizedFunc.clear
|
| 336 |
+
pass
|
| 337 |
+
|
| 338 |
+
def call(self, *args, **kwargs):
|
| 339 |
+
return self.func(*args, **kwargs), {}
|
| 340 |
+
|
| 341 |
+
def check_call_in_cache(self, *args, **kwargs):
|
| 342 |
+
return False
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
###############################################################################
|
| 346 |
+
# class `AsyncNotMemorizedFunc`
|
| 347 |
+
###############################################################################
|
| 348 |
+
class AsyncNotMemorizedFunc(NotMemorizedFunc):
|
| 349 |
+
async def call_and_shelve(self, *args, **kwargs):
|
| 350 |
+
return NotMemorizedResult(await self.func(*args, **kwargs))
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
###############################################################################
|
| 354 |
+
# class `MemorizedFunc`
|
| 355 |
+
###############################################################################
|
| 356 |
+
class MemorizedFunc(Logger):
|
| 357 |
+
"""Callable object decorating a function for caching its return value
|
| 358 |
+
each time it is called.
|
| 359 |
+
|
| 360 |
+
Methods are provided to inspect the cache or clean it.
|
| 361 |
+
|
| 362 |
+
Attributes
|
| 363 |
+
----------
|
| 364 |
+
func: callable
|
| 365 |
+
The original, undecorated, function.
|
| 366 |
+
|
| 367 |
+
location: string
|
| 368 |
+
The location of joblib cache. Depends on the store backend used.
|
| 369 |
+
|
| 370 |
+
backend: str
|
| 371 |
+
Type of store backend for reading/writing cache files.
|
| 372 |
+
Default is 'local', in which case the location is the path to a
|
| 373 |
+
disk storage.
|
| 374 |
+
|
| 375 |
+
ignore: list or None
|
| 376 |
+
List of variable names to ignore when choosing whether to
|
| 377 |
+
recompute.
|
| 378 |
+
|
| 379 |
+
mmap_mode: {None, 'r+', 'r', 'w+', 'c'}
|
| 380 |
+
The memmapping mode used when loading from cache
|
| 381 |
+
numpy arrays. See numpy.load for the meaning of the different
|
| 382 |
+
values.
|
| 383 |
+
|
| 384 |
+
compress: boolean, or integer
|
| 385 |
+
Whether to zip the stored data on disk. If an integer is
|
| 386 |
+
given, it should be between 1 and 9, and sets the amount
|
| 387 |
+
of compression. Note that compressed arrays cannot be
|
| 388 |
+
read by memmapping.
|
| 389 |
+
|
| 390 |
+
verbose: int, optional
|
| 391 |
+
The verbosity flag, controls messages that are issued as
|
| 392 |
+
the function is evaluated.
|
| 393 |
+
|
| 394 |
+
cache_validation_callback: callable, optional
|
| 395 |
+
Callable to check if a result in cache is valid or is to be recomputed.
|
| 396 |
+
When the function is called with arguments for which a cache exists,
|
| 397 |
+
the callback is called with the cache entry's metadata as its sole
|
| 398 |
+
argument. If it returns True, the cached result is returned, else the
|
| 399 |
+
cache for these arguments is cleared and the result is recomputed.
|
| 400 |
+
"""
|
| 401 |
+
|
| 402 |
+
# ------------------------------------------------------------------------
|
| 403 |
+
# Public interface
|
| 404 |
+
# ------------------------------------------------------------------------
|
| 405 |
+
|
| 406 |
+
def __init__(
|
| 407 |
+
self,
|
| 408 |
+
func,
|
| 409 |
+
location,
|
| 410 |
+
backend="local",
|
| 411 |
+
ignore=None,
|
| 412 |
+
mmap_mode=None,
|
| 413 |
+
compress=False,
|
| 414 |
+
verbose=1,
|
| 415 |
+
timestamp=None,
|
| 416 |
+
cache_validation_callback=None,
|
| 417 |
+
):
|
| 418 |
+
Logger.__init__(self)
|
| 419 |
+
self.mmap_mode = mmap_mode
|
| 420 |
+
self.compress = compress
|
| 421 |
+
self.func = func
|
| 422 |
+
self.cache_validation_callback = cache_validation_callback
|
| 423 |
+
self.func_id = _build_func_identifier(func)
|
| 424 |
+
self.ignore = ignore if ignore is not None else []
|
| 425 |
+
self._verbose = verbose
|
| 426 |
+
|
| 427 |
+
# retrieve store object from backend type and location.
|
| 428 |
+
self.store_backend = _store_backend_factory(
|
| 429 |
+
backend,
|
| 430 |
+
location,
|
| 431 |
+
verbose=verbose,
|
| 432 |
+
backend_options=dict(compress=compress, mmap_mode=mmap_mode),
|
| 433 |
+
)
|
| 434 |
+
if self.store_backend is not None:
|
| 435 |
+
# Create func directory on demand.
|
| 436 |
+
self.store_backend.store_cached_func_code([self.func_id])
|
| 437 |
+
|
| 438 |
+
self.timestamp = timestamp if timestamp is not None else time.time()
|
| 439 |
+
try:
|
| 440 |
+
functools.update_wrapper(self, func)
|
| 441 |
+
except Exception:
|
| 442 |
+
pass # Objects like ufunc don't like that
|
| 443 |
+
if inspect.isfunction(func):
|
| 444 |
+
doc = pydoc.TextDoc().document(func)
|
| 445 |
+
# Remove blank line
|
| 446 |
+
doc = doc.replace("\n", "\n\n", 1)
|
| 447 |
+
# Strip backspace-overprints for compatibility with autodoc
|
| 448 |
+
doc = re.sub("\x08.", "", doc)
|
| 449 |
+
else:
|
| 450 |
+
# Pydoc does a poor job on other objects
|
| 451 |
+
doc = func.__doc__
|
| 452 |
+
self.__doc__ = "Memoized version of %s" % doc
|
| 453 |
+
|
| 454 |
+
self._func_code_info = None
|
| 455 |
+
self._func_code_id = None
|
| 456 |
+
|
| 457 |
+
def _is_in_cache_and_valid(self, call_id):
|
| 458 |
+
"""Check if the function call is cached and valid for given arguments.
|
| 459 |
+
|
| 460 |
+
- Compare the function code with the one from the cached function,
|
| 461 |
+
asserting if it has changed.
|
| 462 |
+
- Check if the function call is present in the cache.
|
| 463 |
+
- Call `cache_validation_callback` for user define cache validation.
|
| 464 |
+
|
| 465 |
+
Returns True if the function call is in cache and can be used, and
|
| 466 |
+
returns False otherwise.
|
| 467 |
+
"""
|
| 468 |
+
# Check if the code of the function has changed
|
| 469 |
+
if not self._check_previous_func_code(stacklevel=4):
|
| 470 |
+
return False
|
| 471 |
+
|
| 472 |
+
# Check if this specific call is in the cache
|
| 473 |
+
if not self.store_backend.contains_item(call_id):
|
| 474 |
+
return False
|
| 475 |
+
|
| 476 |
+
# Call the user defined cache validation callback
|
| 477 |
+
metadata = self.store_backend.get_metadata(call_id)
|
| 478 |
+
if (
|
| 479 |
+
self.cache_validation_callback is not None
|
| 480 |
+
and not self.cache_validation_callback(metadata)
|
| 481 |
+
):
|
| 482 |
+
self.store_backend.clear_item(call_id)
|
| 483 |
+
return False
|
| 484 |
+
|
| 485 |
+
return True
|
| 486 |
+
|
| 487 |
+
def _cached_call(self, args, kwargs, shelving):
|
| 488 |
+
"""Call wrapped function and cache result, or read cache if available.
|
| 489 |
+
|
| 490 |
+
This function returns the wrapped function output or a reference to
|
| 491 |
+
the cached result.
|
| 492 |
+
|
| 493 |
+
Arguments:
|
| 494 |
+
----------
|
| 495 |
+
|
| 496 |
+
args, kwargs: list and dict
|
| 497 |
+
input arguments for wrapped function
|
| 498 |
+
|
| 499 |
+
shelving: bool
|
| 500 |
+
True when called via the call_and_shelve function.
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
Returns
|
| 504 |
+
-------
|
| 505 |
+
output: Output of the wrapped function if shelving is false, or a
|
| 506 |
+
MemorizedResult reference to the value if shelving is true.
|
| 507 |
+
metadata: dict containing the metadata associated with the call.
|
| 508 |
+
"""
|
| 509 |
+
args_id = self._get_args_id(*args, **kwargs)
|
| 510 |
+
call_id = (self.func_id, args_id)
|
| 511 |
+
_, func_name = get_func_name(self.func)
|
| 512 |
+
func_info = self.store_backend.get_cached_func_info([self.func_id])
|
| 513 |
+
location = func_info["location"]
|
| 514 |
+
|
| 515 |
+
if self._verbose >= 20:
|
| 516 |
+
logging.basicConfig(level=logging.INFO)
|
| 517 |
+
_, signature = format_signature(self.func, *args, **kwargs)
|
| 518 |
+
self.info(
|
| 519 |
+
textwrap.dedent(
|
| 520 |
+
f"""
|
| 521 |
+
Querying {func_name} with signature
|
| 522 |
+
{signature}.
|
| 523 |
+
|
| 524 |
+
(argument hash {args_id})
|
| 525 |
+
|
| 526 |
+
The store location is {location}.
|
| 527 |
+
"""
|
| 528 |
+
)
|
| 529 |
+
)
|
| 530 |
+
|
| 531 |
+
# Compare the function code with the previous to see if the
|
| 532 |
+
# function code has changed and check if the results are present in
|
| 533 |
+
# the cache.
|
| 534 |
+
if self._is_in_cache_and_valid(call_id):
|
| 535 |
+
if shelving:
|
| 536 |
+
return self._get_memorized_result(call_id), {}
|
| 537 |
+
|
| 538 |
+
try:
|
| 539 |
+
start_time = time.time()
|
| 540 |
+
output = self._load_item(call_id)
|
| 541 |
+
if self._verbose > 4:
|
| 542 |
+
self._print_duration(
|
| 543 |
+
time.time() - start_time, context="cache loaded "
|
| 544 |
+
)
|
| 545 |
+
return output, {}
|
| 546 |
+
except Exception:
|
| 547 |
+
# XXX: Should use an exception logger
|
| 548 |
+
_, signature = format_signature(self.func, *args, **kwargs)
|
| 549 |
+
self.warn(
|
| 550 |
+
"Exception while loading results for {}\n {}".format(
|
| 551 |
+
signature, traceback.format_exc()
|
| 552 |
+
)
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
if self._verbose > 10:
|
| 556 |
+
self.warn(
|
| 557 |
+
f"Computing func {func_name}, argument hash {args_id} "
|
| 558 |
+
f"in location {location}"
|
| 559 |
+
)
|
| 560 |
+
|
| 561 |
+
# Returns the output but not the metadata
|
| 562 |
+
return self._call(call_id, args, kwargs, shelving)
|
| 563 |
+
|
| 564 |
+
@property
|
| 565 |
+
def func_code_info(self):
|
| 566 |
+
# 3-tuple property containing: the function source code, source file,
|
| 567 |
+
# and first line of the code inside the source file
|
| 568 |
+
if hasattr(self.func, "__code__"):
|
| 569 |
+
if self._func_code_id is None:
|
| 570 |
+
self._func_code_id = id(self.func.__code__)
|
| 571 |
+
elif id(self.func.__code__) != self._func_code_id:
|
| 572 |
+
# Be robust to dynamic reassignments of self.func.__code__
|
| 573 |
+
self._func_code_info = None
|
| 574 |
+
|
| 575 |
+
if self._func_code_info is None:
|
| 576 |
+
# Cache the source code of self.func . Provided that get_func_code
|
| 577 |
+
# (which should be called once on self) gets called in the process
|
| 578 |
+
# in which self.func was defined, this caching mechanism prevents
|
| 579 |
+
# undesired cache clearing when the cached function is called in
|
| 580 |
+
# an environment where the introspection utilities get_func_code
|
| 581 |
+
# relies on do not work (typically, in joblib child processes).
|
| 582 |
+
# See #1035 for more info
|
| 583 |
+
# TODO (pierreglaser): do the same with get_func_name?
|
| 584 |
+
self._func_code_info = get_func_code(self.func)
|
| 585 |
+
return self._func_code_info
|
| 586 |
+
|
| 587 |
+
def call_and_shelve(self, *args, **kwargs):
|
| 588 |
+
"""Call wrapped function, cache result and return a reference.
|
| 589 |
+
|
| 590 |
+
This method returns a reference to the cached result instead of the
|
| 591 |
+
result itself. The reference object is small and picklable, allowing
|
| 592 |
+
to send or store it easily. Call .get() on reference object to get
|
| 593 |
+
result.
|
| 594 |
+
|
| 595 |
+
Returns
|
| 596 |
+
-------
|
| 597 |
+
cached_result: MemorizedResult or NotMemorizedResult
|
| 598 |
+
reference to the value returned by the wrapped function. The
|
| 599 |
+
class "NotMemorizedResult" is used when there is no cache
|
| 600 |
+
activated (e.g. location=None in Memory).
|
| 601 |
+
"""
|
| 602 |
+
# Return the wrapped output, without the metadata
|
| 603 |
+
return self._cached_call(args, kwargs, shelving=True)[0]
|
| 604 |
+
|
| 605 |
+
def __call__(self, *args, **kwargs):
|
| 606 |
+
# Return the output, without the metadata
|
| 607 |
+
return self._cached_call(args, kwargs, shelving=False)[0]
|
| 608 |
+
|
| 609 |
+
def __getstate__(self):
|
| 610 |
+
# Make sure self.func's source is introspected prior to being pickled -
|
| 611 |
+
# code introspection utilities typically do not work inside child
|
| 612 |
+
# processes
|
| 613 |
+
_ = self.func_code_info
|
| 614 |
+
|
| 615 |
+
# We don't store the timestamp when pickling, to avoid the hash
|
| 616 |
+
# depending from it.
|
| 617 |
+
state = self.__dict__.copy()
|
| 618 |
+
state["timestamp"] = None
|
| 619 |
+
|
| 620 |
+
# Invalidate the code id as id(obj) will be different in the child
|
| 621 |
+
state["_func_code_id"] = None
|
| 622 |
+
|
| 623 |
+
return state
|
| 624 |
+
|
| 625 |
+
def check_call_in_cache(self, *args, **kwargs):
|
| 626 |
+
"""Check if the function call is cached and valid for given arguments.
|
| 627 |
+
|
| 628 |
+
Does not call the function or do any work besides function inspection
|
| 629 |
+
and argument hashing.
|
| 630 |
+
|
| 631 |
+
- Compare the function code with the one from the cached function,
|
| 632 |
+
asserting if it has changed.
|
| 633 |
+
- Check if the function call is present in the cache.
|
| 634 |
+
- Call `cache_validation_callback` for user define cache validation.
|
| 635 |
+
|
| 636 |
+
Returns
|
| 637 |
+
-------
|
| 638 |
+
is_call_in_cache: bool
|
| 639 |
+
Whether or not the function call is in cache and can be used.
|
| 640 |
+
"""
|
| 641 |
+
call_id = (self.func_id, self._get_args_id(*args, **kwargs))
|
| 642 |
+
return self._is_in_cache_and_valid(call_id)
|
| 643 |
+
|
| 644 |
+
# ------------------------------------------------------------------------
|
| 645 |
+
# Private interface
|
| 646 |
+
# ------------------------------------------------------------------------
|
| 647 |
+
|
| 648 |
+
def _get_args_id(self, *args, **kwargs):
|
| 649 |
+
"""Return the input parameter hash of a result."""
|
| 650 |
+
return hashing.hash(
|
| 651 |
+
filter_args(self.func, self.ignore, args, kwargs),
|
| 652 |
+
coerce_mmap=self.mmap_mode is not None,
|
| 653 |
+
)
|
| 654 |
+
|
| 655 |
+
def _hash_func(self):
|
| 656 |
+
"""Hash a function to key the online cache"""
|
| 657 |
+
func_code_h = hash(getattr(self.func, "__code__", None))
|
| 658 |
+
return id(self.func), hash(self.func), func_code_h
|
| 659 |
+
|
| 660 |
+
def _write_func_code(self, func_code, first_line):
|
| 661 |
+
"""Write the function code and the filename to a file."""
|
| 662 |
+
# We store the first line because the filename and the function
|
| 663 |
+
# name is not always enough to identify a function: people
|
| 664 |
+
# sometimes have several functions named the same way in a
|
| 665 |
+
# file. This is bad practice, but joblib should be robust to bad
|
| 666 |
+
# practice.
|
| 667 |
+
func_code = "%s %i\n%s" % (FIRST_LINE_TEXT, first_line, func_code)
|
| 668 |
+
self.store_backend.store_cached_func_code([self.func_id], func_code)
|
| 669 |
+
|
| 670 |
+
# Also store in the in-memory store of function hashes
|
| 671 |
+
is_named_callable = (
|
| 672 |
+
hasattr(self.func, "__name__") and self.func.__name__ != "<lambda>"
|
| 673 |
+
)
|
| 674 |
+
if is_named_callable:
|
| 675 |
+
# Don't do this for lambda functions or strange callable
|
| 676 |
+
# objects, as it ends up being too fragile
|
| 677 |
+
func_hash = self._hash_func()
|
| 678 |
+
try:
|
| 679 |
+
_FUNCTION_HASHES[self.func] = func_hash
|
| 680 |
+
except TypeError:
|
| 681 |
+
# Some callable are not hashable
|
| 682 |
+
pass
|
| 683 |
+
|
| 684 |
+
def _check_previous_func_code(self, stacklevel=2):
|
| 685 |
+
"""
|
| 686 |
+
stacklevel is the depth a which this function is called, to
|
| 687 |
+
issue useful warnings to the user.
|
| 688 |
+
"""
|
| 689 |
+
# First check if our function is in the in-memory store.
|
| 690 |
+
# Using the in-memory store not only makes things faster, but it
|
| 691 |
+
# also renders us robust to variations of the files when the
|
| 692 |
+
# in-memory version of the code does not vary
|
| 693 |
+
try:
|
| 694 |
+
if self.func in _FUNCTION_HASHES:
|
| 695 |
+
# We use as an identifier the id of the function and its
|
| 696 |
+
# hash. This is more likely to falsely change than have hash
|
| 697 |
+
# collisions, thus we are on the safe side.
|
| 698 |
+
func_hash = self._hash_func()
|
| 699 |
+
if func_hash == _FUNCTION_HASHES[self.func]:
|
| 700 |
+
return True
|
| 701 |
+
except TypeError:
|
| 702 |
+
# Some callables are not hashable
|
| 703 |
+
pass
|
| 704 |
+
|
| 705 |
+
# Here, we go through some effort to be robust to dynamically
|
| 706 |
+
# changing code and collision. We cannot inspect.getsource
|
| 707 |
+
# because it is not reliable when using IPython's magic "%run".
|
| 708 |
+
func_code, source_file, first_line = self.func_code_info
|
| 709 |
+
try:
|
| 710 |
+
old_func_code, old_first_line = extract_first_line(
|
| 711 |
+
self.store_backend.get_cached_func_code([self.func_id])
|
| 712 |
+
)
|
| 713 |
+
except (IOError, OSError): # some backend can also raise OSError
|
| 714 |
+
self._write_func_code(func_code, first_line)
|
| 715 |
+
return False
|
| 716 |
+
if old_func_code == func_code:
|
| 717 |
+
return True
|
| 718 |
+
|
| 719 |
+
# We have differing code, is this because we are referring to
|
| 720 |
+
# different functions, or because the function we are referring to has
|
| 721 |
+
# changed?
|
| 722 |
+
|
| 723 |
+
_, func_name = get_func_name(
|
| 724 |
+
self.func, resolv_alias=False, win_characters=False
|
| 725 |
+
)
|
| 726 |
+
if old_first_line == first_line == -1 or func_name == "<lambda>":
|
| 727 |
+
if not first_line == -1:
|
| 728 |
+
func_description = "{0} ({1}:{2})".format(
|
| 729 |
+
func_name, source_file, first_line
|
| 730 |
+
)
|
| 731 |
+
else:
|
| 732 |
+
func_description = func_name
|
| 733 |
+
warnings.warn(
|
| 734 |
+
JobLibCollisionWarning(
|
| 735 |
+
"Cannot detect name collisions for function '{0}'".format(
|
| 736 |
+
func_description
|
| 737 |
+
)
|
| 738 |
+
),
|
| 739 |
+
stacklevel=stacklevel,
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
# Fetch the code at the old location and compare it. If it is the
|
| 743 |
+
# same than the code store, we have a collision: the code in the
|
| 744 |
+
# file has not changed, but the name we have is pointing to a new
|
| 745 |
+
# code block.
|
| 746 |
+
if not old_first_line == first_line and source_file is not None:
|
| 747 |
+
if os.path.exists(source_file):
|
| 748 |
+
_, func_name = get_func_name(self.func, resolv_alias=False)
|
| 749 |
+
num_lines = len(func_code.split("\n"))
|
| 750 |
+
with tokenize.open(source_file) as f:
|
| 751 |
+
on_disk_func_code = f.readlines()[
|
| 752 |
+
old_first_line - 1 : old_first_line - 1 + num_lines - 1
|
| 753 |
+
]
|
| 754 |
+
on_disk_func_code = "".join(on_disk_func_code)
|
| 755 |
+
possible_collision = (
|
| 756 |
+
on_disk_func_code.rstrip() == old_func_code.rstrip()
|
| 757 |
+
)
|
| 758 |
+
else:
|
| 759 |
+
possible_collision = source_file.startswith("<doctest ")
|
| 760 |
+
if possible_collision:
|
| 761 |
+
warnings.warn(
|
| 762 |
+
JobLibCollisionWarning(
|
| 763 |
+
"Possible name collisions between functions "
|
| 764 |
+
"'%s' (%s:%i) and '%s' (%s:%i)"
|
| 765 |
+
% (
|
| 766 |
+
func_name,
|
| 767 |
+
source_file,
|
| 768 |
+
old_first_line,
|
| 769 |
+
func_name,
|
| 770 |
+
source_file,
|
| 771 |
+
first_line,
|
| 772 |
+
)
|
| 773 |
+
),
|
| 774 |
+
stacklevel=stacklevel,
|
| 775 |
+
)
|
| 776 |
+
|
| 777 |
+
# The function has changed, wipe the cache directory.
|
| 778 |
+
# XXX: Should be using warnings, and giving stacklevel
|
| 779 |
+
if self._verbose > 10:
|
| 780 |
+
_, func_name = get_func_name(self.func, resolv_alias=False)
|
| 781 |
+
self.warn(
|
| 782 |
+
"Function {0} (identified by {1}) has changed.".format(
|
| 783 |
+
func_name, self.func_id
|
| 784 |
+
)
|
| 785 |
+
)
|
| 786 |
+
self.clear(warn=True)
|
| 787 |
+
return False
|
| 788 |
+
|
| 789 |
+
def clear(self, warn=True):
|
| 790 |
+
"""Empty the function's cache."""
|
| 791 |
+
func_id = self.func_id
|
| 792 |
+
if self._verbose > 0 and warn:
|
| 793 |
+
self.warn("Clearing function cache identified by %s" % func_id)
|
| 794 |
+
self.store_backend.clear_path(
|
| 795 |
+
[
|
| 796 |
+
func_id,
|
| 797 |
+
]
|
| 798 |
+
)
|
| 799 |
+
|
| 800 |
+
func_code, _, first_line = self.func_code_info
|
| 801 |
+
self._write_func_code(func_code, first_line)
|
| 802 |
+
|
| 803 |
+
def call(self, *args, **kwargs):
|
| 804 |
+
"""Force the execution of the function with the given arguments.
|
| 805 |
+
|
| 806 |
+
The output values will be persisted, i.e., the cache will be updated
|
| 807 |
+
with any new values.
|
| 808 |
+
|
| 809 |
+
Parameters
|
| 810 |
+
----------
|
| 811 |
+
*args: arguments
|
| 812 |
+
The arguments.
|
| 813 |
+
**kwargs: keyword arguments
|
| 814 |
+
Keyword arguments.
|
| 815 |
+
|
| 816 |
+
Returns
|
| 817 |
+
-------
|
| 818 |
+
output : object
|
| 819 |
+
The output of the function call.
|
| 820 |
+
metadata : dict
|
| 821 |
+
The metadata associated with the call.
|
| 822 |
+
"""
|
| 823 |
+
call_id = (self.func_id, self._get_args_id(*args, **kwargs))
|
| 824 |
+
|
| 825 |
+
# Return the output and the metadata
|
| 826 |
+
return self._call(call_id, args, kwargs)
|
| 827 |
+
|
| 828 |
+
def _call(self, call_id, args, kwargs, shelving=False):
|
| 829 |
+
# Return the output and the metadata
|
| 830 |
+
self._before_call(args, kwargs)
|
| 831 |
+
start_time = time.time()
|
| 832 |
+
output = self.func(*args, **kwargs)
|
| 833 |
+
return self._after_call(call_id, args, kwargs, shelving, output, start_time)
|
| 834 |
+
|
| 835 |
+
def _before_call(self, args, kwargs):
|
| 836 |
+
if self._verbose > 0:
|
| 837 |
+
print(format_call(self.func, args, kwargs))
|
| 838 |
+
|
| 839 |
+
def _after_call(self, call_id, args, kwargs, shelving, output, start_time):
|
| 840 |
+
self.store_backend.dump_item(call_id, output, verbose=self._verbose)
|
| 841 |
+
duration = time.time() - start_time
|
| 842 |
+
if self._verbose > 0:
|
| 843 |
+
self._print_duration(duration)
|
| 844 |
+
metadata = self._persist_input(duration, call_id, args, kwargs)
|
| 845 |
+
if shelving:
|
| 846 |
+
return self._get_memorized_result(call_id, metadata), metadata
|
| 847 |
+
|
| 848 |
+
if self.mmap_mode is not None:
|
| 849 |
+
# Memmap the output at the first call to be consistent with
|
| 850 |
+
# later calls
|
| 851 |
+
output = self._load_item(call_id, metadata)
|
| 852 |
+
return output, metadata
|
| 853 |
+
|
| 854 |
+
def _persist_input(self, duration, call_id, args, kwargs, this_duration_limit=0.5):
|
| 855 |
+
"""Save a small summary of the call using json format in the
|
| 856 |
+
output directory.
|
| 857 |
+
|
| 858 |
+
output_dir: string
|
| 859 |
+
directory where to write metadata.
|
| 860 |
+
|
| 861 |
+
duration: float
|
| 862 |
+
time taken by hashing input arguments, calling the wrapped
|
| 863 |
+
function and persisting its output.
|
| 864 |
+
|
| 865 |
+
args, kwargs: list and dict
|
| 866 |
+
input arguments for wrapped function
|
| 867 |
+
|
| 868 |
+
this_duration_limit: float
|
| 869 |
+
Max execution time for this function before issuing a warning.
|
| 870 |
+
"""
|
| 871 |
+
start_time = time.time()
|
| 872 |
+
argument_dict = filter_args(self.func, self.ignore, args, kwargs)
|
| 873 |
+
|
| 874 |
+
input_repr = dict((k, repr(v)) for k, v in argument_dict.items())
|
| 875 |
+
# This can fail due to race-conditions with multiple
|
| 876 |
+
# concurrent joblibs removing the file or the directory
|
| 877 |
+
metadata = {
|
| 878 |
+
"duration": duration,
|
| 879 |
+
"input_args": input_repr,
|
| 880 |
+
"time": start_time,
|
| 881 |
+
}
|
| 882 |
+
|
| 883 |
+
self.store_backend.store_metadata(call_id, metadata)
|
| 884 |
+
|
| 885 |
+
this_duration = time.time() - start_time
|
| 886 |
+
if this_duration > this_duration_limit:
|
| 887 |
+
# This persistence should be fast. It will not be if repr() takes
|
| 888 |
+
# time and its output is large, because json.dump will have to
|
| 889 |
+
# write a large file. This should not be an issue with numpy arrays
|
| 890 |
+
# for which repr() always output a short representation, but can
|
| 891 |
+
# be with complex dictionaries. Fixing the problem should be a
|
| 892 |
+
# matter of replacing repr() above by something smarter.
|
| 893 |
+
warnings.warn(
|
| 894 |
+
"Persisting input arguments took %.2fs to run."
|
| 895 |
+
"If this happens often in your code, it can cause "
|
| 896 |
+
"performance problems "
|
| 897 |
+
"(results will be correct in all cases). "
|
| 898 |
+
"The reason for this is probably some large input "
|
| 899 |
+
"arguments for a wrapped function." % this_duration,
|
| 900 |
+
stacklevel=5,
|
| 901 |
+
)
|
| 902 |
+
return metadata
|
| 903 |
+
|
| 904 |
+
def _get_memorized_result(self, call_id, metadata=None):
|
| 905 |
+
return MemorizedResult(
|
| 906 |
+
self.store_backend,
|
| 907 |
+
call_id,
|
| 908 |
+
metadata=metadata,
|
| 909 |
+
timestamp=self.timestamp,
|
| 910 |
+
verbose=self._verbose - 1,
|
| 911 |
+
)
|
| 912 |
+
|
| 913 |
+
def _load_item(self, call_id, metadata=None):
|
| 914 |
+
return self.store_backend.load_item(
|
| 915 |
+
call_id, metadata=metadata, timestamp=self.timestamp, verbose=self._verbose
|
| 916 |
+
)
|
| 917 |
+
|
| 918 |
+
def _print_duration(self, duration, context=""):
|
| 919 |
+
_, name = get_func_name(self.func)
|
| 920 |
+
msg = f"{name} {context}- {format_time(duration)}"
|
| 921 |
+
print(max(0, (80 - len(msg))) * "_" + msg)
|
| 922 |
+
|
| 923 |
+
# ------------------------------------------------------------------------
|
| 924 |
+
# Private `object` interface
|
| 925 |
+
# ------------------------------------------------------------------------
|
| 926 |
+
|
| 927 |
+
def __repr__(self):
|
| 928 |
+
return "{class_name}(func={func}, location={location})".format(
|
| 929 |
+
class_name=self.__class__.__name__,
|
| 930 |
+
func=self.func,
|
| 931 |
+
location=self.store_backend.location,
|
| 932 |
+
)
|
| 933 |
+
|
| 934 |
+
|
| 935 |
+
###############################################################################
|
| 936 |
+
# class `AsyncMemorizedFunc`
|
| 937 |
+
###############################################################################
|
| 938 |
+
class AsyncMemorizedFunc(MemorizedFunc):
|
| 939 |
+
async def __call__(self, *args, **kwargs):
|
| 940 |
+
out = self._cached_call(args, kwargs, shelving=False)
|
| 941 |
+
out = await out if asyncio.iscoroutine(out) else out
|
| 942 |
+
return out[0] # Don't return metadata
|
| 943 |
+
|
| 944 |
+
async def call_and_shelve(self, *args, **kwargs):
|
| 945 |
+
out = self._cached_call(args, kwargs, shelving=True)
|
| 946 |
+
out = await out if asyncio.iscoroutine(out) else out
|
| 947 |
+
return out[0] # Don't return metadata
|
| 948 |
+
|
| 949 |
+
async def call(self, *args, **kwargs):
|
| 950 |
+
out = super().call(*args, **kwargs)
|
| 951 |
+
return await out if asyncio.iscoroutine(out) else out
|
| 952 |
+
|
| 953 |
+
async def _call(self, call_id, args, kwargs, shelving=False):
|
| 954 |
+
self._before_call(args, kwargs)
|
| 955 |
+
start_time = time.time()
|
| 956 |
+
output = await self.func(*args, **kwargs)
|
| 957 |
+
return self._after_call(call_id, args, kwargs, shelving, output, start_time)
|
| 958 |
+
|
| 959 |
+
|
| 960 |
+
###############################################################################
|
| 961 |
+
# class `Memory`
|
| 962 |
+
###############################################################################
|
| 963 |
+
class Memory(Logger):
|
| 964 |
+
"""A context object for caching a function's return value each time it
|
| 965 |
+
is called with the same input arguments.
|
| 966 |
+
|
| 967 |
+
All values are cached on the filesystem, in a deep directory
|
| 968 |
+
structure.
|
| 969 |
+
|
| 970 |
+
Read more in the :ref:`User Guide <memory>`.
|
| 971 |
+
|
| 972 |
+
Parameters
|
| 973 |
+
----------
|
| 974 |
+
location: str, pathlib.Path or None
|
| 975 |
+
The path of the base directory to use as a data store
|
| 976 |
+
or None. If None is given, no caching is done and
|
| 977 |
+
the Memory object is completely transparent. This option
|
| 978 |
+
replaces cachedir since version 0.12.
|
| 979 |
+
|
| 980 |
+
backend: str, optional, default='local'
|
| 981 |
+
Type of store backend for reading/writing cache files.
|
| 982 |
+
The 'local' backend is using regular filesystem operations to
|
| 983 |
+
manipulate data (open, mv, etc) in the backend.
|
| 984 |
+
|
| 985 |
+
mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
|
| 986 |
+
The memmapping mode used when loading from cache
|
| 987 |
+
numpy arrays. See numpy.load for the meaning of the
|
| 988 |
+
arguments.
|
| 989 |
+
|
| 990 |
+
compress: boolean, or integer, optional
|
| 991 |
+
Whether to zip the stored data on disk. If an integer is
|
| 992 |
+
given, it should be between 1 and 9, and sets the amount
|
| 993 |
+
of compression. Note that compressed arrays cannot be
|
| 994 |
+
read by memmapping.
|
| 995 |
+
|
| 996 |
+
verbose: int, optional
|
| 997 |
+
Verbosity flag, controls the debug messages that are issued
|
| 998 |
+
as functions are evaluated.
|
| 999 |
+
|
| 1000 |
+
backend_options: dict, optional
|
| 1001 |
+
Contains a dictionary of named parameters used to configure
|
| 1002 |
+
the store backend.
|
| 1003 |
+
"""
|
| 1004 |
+
|
| 1005 |
+
# ------------------------------------------------------------------------
|
| 1006 |
+
# Public interface
|
| 1007 |
+
# ------------------------------------------------------------------------
|
| 1008 |
+
|
| 1009 |
+
def __init__(
|
| 1010 |
+
self,
|
| 1011 |
+
location=None,
|
| 1012 |
+
backend="local",
|
| 1013 |
+
mmap_mode=None,
|
| 1014 |
+
compress=False,
|
| 1015 |
+
verbose=1,
|
| 1016 |
+
backend_options=None,
|
| 1017 |
+
):
|
| 1018 |
+
Logger.__init__(self)
|
| 1019 |
+
self._verbose = verbose
|
| 1020 |
+
self.mmap_mode = mmap_mode
|
| 1021 |
+
self.timestamp = time.time()
|
| 1022 |
+
self.backend = backend
|
| 1023 |
+
self.compress = compress
|
| 1024 |
+
if backend_options is None:
|
| 1025 |
+
backend_options = {}
|
| 1026 |
+
self.backend_options = backend_options
|
| 1027 |
+
|
| 1028 |
+
if compress and mmap_mode is not None:
|
| 1029 |
+
warnings.warn("Compressed results cannot be memmapped", stacklevel=2)
|
| 1030 |
+
|
| 1031 |
+
self.location = location
|
| 1032 |
+
if isinstance(location, str):
|
| 1033 |
+
location = os.path.join(location, "joblib")
|
| 1034 |
+
|
| 1035 |
+
self.store_backend = _store_backend_factory(
|
| 1036 |
+
backend,
|
| 1037 |
+
location,
|
| 1038 |
+
verbose=self._verbose,
|
| 1039 |
+
backend_options=dict(
|
| 1040 |
+
compress=compress, mmap_mode=mmap_mode, **backend_options
|
| 1041 |
+
),
|
| 1042 |
+
)
|
| 1043 |
+
|
| 1044 |
+
def cache(
|
| 1045 |
+
self,
|
| 1046 |
+
func=None,
|
| 1047 |
+
ignore=None,
|
| 1048 |
+
verbose=None,
|
| 1049 |
+
mmap_mode=False,
|
| 1050 |
+
cache_validation_callback=None,
|
| 1051 |
+
):
|
| 1052 |
+
"""Decorates the given function func to only compute its return
|
| 1053 |
+
value for input arguments not cached on disk.
|
| 1054 |
+
|
| 1055 |
+
Parameters
|
| 1056 |
+
----------
|
| 1057 |
+
func: callable, optional
|
| 1058 |
+
The function to be decorated
|
| 1059 |
+
ignore: list of strings
|
| 1060 |
+
A list of arguments name to ignore in the hashing
|
| 1061 |
+
verbose: integer, optional
|
| 1062 |
+
The verbosity mode of the function. By default that
|
| 1063 |
+
of the memory object is used.
|
| 1064 |
+
mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
|
| 1065 |
+
The memmapping mode used when loading from cache
|
| 1066 |
+
numpy arrays. See numpy.load for the meaning of the
|
| 1067 |
+
arguments. By default that of the memory object is used.
|
| 1068 |
+
cache_validation_callback: callable, optional
|
| 1069 |
+
Callable to validate whether or not the cache is valid. When
|
| 1070 |
+
the cached function is called with arguments for which a cache
|
| 1071 |
+
exists, this callable is called with the metadata of the cached
|
| 1072 |
+
result as its sole argument. If it returns True, then the
|
| 1073 |
+
cached result is returned, else the cache for these arguments
|
| 1074 |
+
is cleared and recomputed.
|
| 1075 |
+
|
| 1076 |
+
Returns
|
| 1077 |
+
-------
|
| 1078 |
+
decorated_func: MemorizedFunc object
|
| 1079 |
+
The returned object is a MemorizedFunc object, that is
|
| 1080 |
+
callable (behaves like a function), but offers extra
|
| 1081 |
+
methods for cache lookup and management. See the
|
| 1082 |
+
documentation for :class:`joblib.memory.MemorizedFunc`.
|
| 1083 |
+
"""
|
| 1084 |
+
if cache_validation_callback is not None and not callable(
|
| 1085 |
+
cache_validation_callback
|
| 1086 |
+
):
|
| 1087 |
+
raise ValueError(
|
| 1088 |
+
"cache_validation_callback needs to be callable. "
|
| 1089 |
+
f"Got {cache_validation_callback}."
|
| 1090 |
+
)
|
| 1091 |
+
if func is None:
|
| 1092 |
+
# Partial application, to be able to specify extra keyword
|
| 1093 |
+
# arguments in decorators
|
| 1094 |
+
return functools.partial(
|
| 1095 |
+
self.cache,
|
| 1096 |
+
ignore=ignore,
|
| 1097 |
+
mmap_mode=mmap_mode,
|
| 1098 |
+
verbose=verbose,
|
| 1099 |
+
cache_validation_callback=cache_validation_callback,
|
| 1100 |
+
)
|
| 1101 |
+
if self.store_backend is None:
|
| 1102 |
+
cls = (
|
| 1103 |
+
AsyncNotMemorizedFunc
|
| 1104 |
+
if inspect.iscoroutinefunction(func)
|
| 1105 |
+
else NotMemorizedFunc
|
| 1106 |
+
)
|
| 1107 |
+
return cls(func)
|
| 1108 |
+
if verbose is None:
|
| 1109 |
+
verbose = self._verbose
|
| 1110 |
+
if mmap_mode is False:
|
| 1111 |
+
mmap_mode = self.mmap_mode
|
| 1112 |
+
if isinstance(func, MemorizedFunc):
|
| 1113 |
+
func = func.func
|
| 1114 |
+
cls = AsyncMemorizedFunc if inspect.iscoroutinefunction(func) else MemorizedFunc
|
| 1115 |
+
return cls(
|
| 1116 |
+
func,
|
| 1117 |
+
location=self.store_backend,
|
| 1118 |
+
backend=self.backend,
|
| 1119 |
+
ignore=ignore,
|
| 1120 |
+
mmap_mode=mmap_mode,
|
| 1121 |
+
compress=self.compress,
|
| 1122 |
+
verbose=verbose,
|
| 1123 |
+
timestamp=self.timestamp,
|
| 1124 |
+
cache_validation_callback=cache_validation_callback,
|
| 1125 |
+
)
|
| 1126 |
+
|
| 1127 |
+
def clear(self, warn=True):
|
| 1128 |
+
"""Erase the complete cache directory."""
|
| 1129 |
+
if warn:
|
| 1130 |
+
self.warn("Flushing completely the cache")
|
| 1131 |
+
if self.store_backend is not None:
|
| 1132 |
+
self.store_backend.clear()
|
| 1133 |
+
|
| 1134 |
+
# As the cache is completely clear, make sure the _FUNCTION_HASHES
|
| 1135 |
+
# cache is also reset. Else, for a function that is present in this
|
| 1136 |
+
# table, results cached after this clear will be have cache miss
|
| 1137 |
+
# as the function code is not re-written.
|
| 1138 |
+
_FUNCTION_HASHES.clear()
|
| 1139 |
+
|
| 1140 |
+
def reduce_size(self, bytes_limit=None, items_limit=None, age_limit=None):
|
| 1141 |
+
"""Remove cache elements to make the cache fit its limits.
|
| 1142 |
+
|
| 1143 |
+
The limitation can impose that the cache size fits in ``bytes_limit``,
|
| 1144 |
+
that the number of cache items is no more than ``items_limit``, and
|
| 1145 |
+
that all files in cache are not older than ``age_limit``.
|
| 1146 |
+
|
| 1147 |
+
Parameters
|
| 1148 |
+
----------
|
| 1149 |
+
bytes_limit: int | str, optional
|
| 1150 |
+
Limit in bytes of the size of the cache. By default, the size of
|
| 1151 |
+
the cache is unlimited. When reducing the size of the cache,
|
| 1152 |
+
``joblib`` keeps the most recently accessed items first. If a
|
| 1153 |
+
str is passed, it is converted to a number of bytes using units
|
| 1154 |
+
{ K | M | G} for kilo, mega, giga.
|
| 1155 |
+
|
| 1156 |
+
items_limit: int, optional
|
| 1157 |
+
Number of items to limit the cache to. By default, the number of
|
| 1158 |
+
items in the cache is unlimited. When reducing the size of the
|
| 1159 |
+
cache, ``joblib`` keeps the most recently accessed items first.
|
| 1160 |
+
|
| 1161 |
+
age_limit: datetime.timedelta, optional
|
| 1162 |
+
Maximum age of items to limit the cache to. When reducing the size
|
| 1163 |
+
of the cache, any items last accessed more than the given length of
|
| 1164 |
+
time ago are deleted. Example: to remove files older than 5 days,
|
| 1165 |
+
use datetime.timedelta(days=5). Negative timedelta are not
|
| 1166 |
+
accepted.
|
| 1167 |
+
"""
|
| 1168 |
+
if self.store_backend is None:
|
| 1169 |
+
# No cached results, this function does nothing.
|
| 1170 |
+
return
|
| 1171 |
+
|
| 1172 |
+
if bytes_limit is None and items_limit is None and age_limit is None:
|
| 1173 |
+
# No limitation to impose, returning
|
| 1174 |
+
return
|
| 1175 |
+
|
| 1176 |
+
# Defers the actual limits enforcing to the store backend.
|
| 1177 |
+
self.store_backend.enforce_store_limits(bytes_limit, items_limit, age_limit)
|
| 1178 |
+
|
| 1179 |
+
def eval(self, func, *args, **kwargs):
|
| 1180 |
+
"""Eval function func with arguments `*args` and `**kwargs`,
|
| 1181 |
+
in the context of the memory.
|
| 1182 |
+
|
| 1183 |
+
This method works similarly to the builtin `apply`, except
|
| 1184 |
+
that the function is called only if the cache is not
|
| 1185 |
+
up to date.
|
| 1186 |
+
|
| 1187 |
+
"""
|
| 1188 |
+
if self.store_backend is None:
|
| 1189 |
+
return func(*args, **kwargs)
|
| 1190 |
+
return self.cache(func)(*args, **kwargs)
|
| 1191 |
+
|
| 1192 |
+
# ------------------------------------------------------------------------
|
| 1193 |
+
# Private `object` interface
|
| 1194 |
+
# ------------------------------------------------------------------------
|
| 1195 |
+
|
| 1196 |
+
def __repr__(self):
|
| 1197 |
+
return "{class_name}(location={location})".format(
|
| 1198 |
+
class_name=self.__class__.__name__,
|
| 1199 |
+
location=(
|
| 1200 |
+
None if self.store_backend is None else self.store_backend.location
|
| 1201 |
+
),
|
| 1202 |
+
)
|
| 1203 |
+
|
| 1204 |
+
def __getstate__(self):
|
| 1205 |
+
"""We don't store the timestamp when pickling, to avoid the hash
|
| 1206 |
+
depending from it.
|
| 1207 |
+
"""
|
| 1208 |
+
state = self.__dict__.copy()
|
| 1209 |
+
state["timestamp"] = None
|
| 1210 |
+
return state
|
| 1211 |
+
|
| 1212 |
+
|
| 1213 |
+
###############################################################################
|
| 1214 |
+
# cache_validation_callback helpers
|
| 1215 |
+
###############################################################################
|
| 1216 |
+
|
| 1217 |
+
|
| 1218 |
+
def expires_after(
|
| 1219 |
+
days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0
|
| 1220 |
+
):
|
| 1221 |
+
"""Helper cache_validation_callback to force recompute after a duration.
|
| 1222 |
+
|
| 1223 |
+
Parameters
|
| 1224 |
+
----------
|
| 1225 |
+
days, seconds, microseconds, milliseconds, minutes, hours, weeks: numbers
|
| 1226 |
+
argument passed to a timedelta.
|
| 1227 |
+
"""
|
| 1228 |
+
delta = datetime.timedelta(
|
| 1229 |
+
days=days,
|
| 1230 |
+
seconds=seconds,
|
| 1231 |
+
microseconds=microseconds,
|
| 1232 |
+
milliseconds=milliseconds,
|
| 1233 |
+
minutes=minutes,
|
| 1234 |
+
hours=hours,
|
| 1235 |
+
weeks=weeks,
|
| 1236 |
+
)
|
| 1237 |
+
|
| 1238 |
+
def cache_validation_callback(metadata):
|
| 1239 |
+
computation_age = time.time() - metadata["time"]
|
| 1240 |
+
return computation_age < delta.total_seconds()
|
| 1241 |
+
|
| 1242 |
+
return cache_validation_callback
|