Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so +3 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so +3 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so +3 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libXau-00ec42fe.so.6.0.0 +0 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/__init__.py +21 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/accelerator.py +1154 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/big_modeling.py +324 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/checkpointing.py +185 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/data_loader.py +654 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/hooks.py +480 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/launchers.py +177 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/logging.py +63 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/memory_utils.py +29 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/optimizer.py +159 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/scheduler.py +89 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/state.py +262 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/tracking.py +332 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/__init__.py +39 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/codingstatemachine.py +88 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/cp949prober.py +49 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/enums.py +76 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/eucjpprober.py +92 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/euctwprober.py +46 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/gb2312prober.py +46 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/hebrewprober.py +292 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/jpcntx.py +233 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langcyrillicmodel.py +333 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langgreekmodel.py +225 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langthaimodel.py +199 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langturkishmodel.py +193 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/mbcsgroupprober.py +54 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sbcsgroupprober.py +73 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sjisprober.py +92 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/universaldetector.py +286 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/__init__.py +1 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/cli.py +141 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/conda_fmt.py +144 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/interface.py +24 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/streaming.py +42 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/utils.py +493 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/validate.py +112 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/distutils-precedence.pth +3 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/future/__init__.py +93 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/METADATA +530 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/RECORD +66 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/top_level.txt +1 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/INSTALLER +1 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/LICENSE +15 -0
- my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/METADATA +245 -0
.gitattributes
CHANGED
|
@@ -240,3 +240,6 @@ my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_struct.cpyth
|
|
| 240 |
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_json.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 241 |
my_container_sandbox/workspace/anaconda3/lib/libnppig.so.11.3.3.95 filter=lfs diff=lfs merge=lfs -text
|
| 242 |
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_pickle.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_json.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 241 |
my_container_sandbox/workspace/anaconda3/lib/libnppig.so.11.3.3.95 filter=lfs diff=lfs merge=lfs -text
|
| 242 |
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_pickle.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 243 |
+
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 244 |
+
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 245 |
+
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dd191d7e8e9abe1ef92f04ea5546ee64ced1fcdf6aa7a4beaa6f7ae3126358d
|
| 3 |
+
size 177608
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f172437ab7f53fb597873b15ed5c41e4a63a346968a7b3b3b1ab71832c1dd2c
|
| 3 |
+
size 464208
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:218bd174fff49763ef22945a55a34b57370283ab40ab6fca237345f183ea256a
|
| 3 |
+
size 347560
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libXau-00ec42fe.so.6.0.0
ADDED
|
Binary file (17 kB). View file
|
|
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa
|
| 2 |
+
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
| 3 |
+
# module, but to preserve other warnings. So, don't check this module at all.
|
| 4 |
+
|
| 5 |
+
__version__ = "0.11.0"
|
| 6 |
+
|
| 7 |
+
from .accelerator import Accelerator
|
| 8 |
+
from .big_modeling import cpu_offload, disk_offload, dispatch_model, init_empty_weights, load_checkpoint_and_dispatch
|
| 9 |
+
from .launchers import debug_launcher, notebook_launcher
|
| 10 |
+
from .utils import (
|
| 11 |
+
DeepSpeedPlugin,
|
| 12 |
+
DistributedDataParallelKwargs,
|
| 13 |
+
DistributedType,
|
| 14 |
+
FullyShardedDataParallelPlugin,
|
| 15 |
+
GradScalerKwargs,
|
| 16 |
+
InitProcessGroupKwargs,
|
| 17 |
+
find_executable_batch_size,
|
| 18 |
+
infer_auto_device_map,
|
| 19 |
+
load_checkpoint_in_model,
|
| 20 |
+
synchronize_rng_states,
|
| 21 |
+
)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/accelerator.py
ADDED
|
@@ -0,0 +1,1154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import contextlib
|
| 16 |
+
import gc
|
| 17 |
+
import math
|
| 18 |
+
import os
|
| 19 |
+
import sys
|
| 20 |
+
import warnings
|
| 21 |
+
from contextlib import contextmanager
|
| 22 |
+
from typing import List, Optional, Union
|
| 23 |
+
|
| 24 |
+
import torch
|
| 25 |
+
|
| 26 |
+
from .checkpointing import load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state
|
| 27 |
+
from .data_loader import prepare_data_loader
|
| 28 |
+
from .logging import get_logger
|
| 29 |
+
from .optimizer import AcceleratedOptimizer
|
| 30 |
+
from .scheduler import AcceleratedScheduler
|
| 31 |
+
from .state import AcceleratorState, GradientState
|
| 32 |
+
from .tracking import LOGGER_TYPE_TO_CLASS, GeneralTracker, filter_trackers
|
| 33 |
+
from .utils import (
|
| 34 |
+
DeepSpeedPlugin,
|
| 35 |
+
DistributedDataParallelKwargs,
|
| 36 |
+
DistributedType,
|
| 37 |
+
FullyShardedDataParallelPlugin,
|
| 38 |
+
GradScalerKwargs,
|
| 39 |
+
InitProcessGroupKwargs,
|
| 40 |
+
KwargsHandler,
|
| 41 |
+
LoggerType,
|
| 42 |
+
PrecisionType,
|
| 43 |
+
RNGType,
|
| 44 |
+
compare_versions,
|
| 45 |
+
convert_outputs_to_fp32,
|
| 46 |
+
extract_model_from_parallel,
|
| 47 |
+
gather,
|
| 48 |
+
get_pretty_name,
|
| 49 |
+
is_bf16_available,
|
| 50 |
+
is_deepspeed_available,
|
| 51 |
+
is_torch_version,
|
| 52 |
+
is_tpu_available,
|
| 53 |
+
pad_across_processes,
|
| 54 |
+
reduce,
|
| 55 |
+
save,
|
| 56 |
+
wait_for_everyone,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
if is_deepspeed_available():
|
| 61 |
+
import deepspeed
|
| 62 |
+
|
| 63 |
+
from .utils import (
|
| 64 |
+
DeepSpeedEngineWrapper,
|
| 65 |
+
DeepSpeedOptimizerWrapper,
|
| 66 |
+
DeepSpeedSchedulerWrapper,
|
| 67 |
+
DummyOptim,
|
| 68 |
+
DummyScheduler,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
if is_tpu_available(check_device=False):
|
| 72 |
+
import torch_xla.distributed.xla_multiprocessing as xmp
|
| 73 |
+
|
| 74 |
+
logger = get_logger(__name__)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class Accelerator:
|
| 78 |
+
"""
|
| 79 |
+
Creates an instance of an accelerator for distributed training (on multi-GPU, TPU) or mixed precision training.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
device_placement (`bool`, *optional*, defaults to `True`):
|
| 83 |
+
Whether or not the accelerator should put objects on device (tensors yielded by the dataloader, model,
|
| 84 |
+
etc...).
|
| 85 |
+
split_batches (`bool`, *optional*, defaults to `False`):
|
| 86 |
+
Whether or not the accelerator should split the batches yielded by the dataloaders across the devices. If
|
| 87 |
+
`True` the actual batch size used will be the same on any kind of distributed processes, but it must be a
|
| 88 |
+
round multiple of the `num_processes` you are using. If `False`, actual batch size used will be the one set
|
| 89 |
+
in your script multiplied by the number of processes.
|
| 90 |
+
mixed_precision (`str`, *optional*):
|
| 91 |
+
Whether or not to use mixed precision training (fp16 or bfloat16). Choose from 'no','fp16','bf16'. Will
|
| 92 |
+
default to the value in the environment variable `MIXED_PRECISION`, which will use the default value in the
|
| 93 |
+
accelerate config of the current system or the flag passed with the `accelerate.launch` command. 'fp16'
|
| 94 |
+
requires pytorch 1.6 or higher. 'bf16' requires pytorch 1.10 or higher.
|
| 95 |
+
gradient_accumulation_steps (`int`, *optional*, default to 1):
|
| 96 |
+
The number of steps that should pass before gradients are accumulated. A number > 1 should be combined with
|
| 97 |
+
`Accelerator.accumulate`.
|
| 98 |
+
cpu (`bool`, *optional*):
|
| 99 |
+
Whether or not to force the script to execute on CPU. Will ignore GPU available if set to `True` and force
|
| 100 |
+
the execution on one process only.
|
| 101 |
+
deepspeed_plugin (`DeepSpeedPlugin`, *optional*):
|
| 102 |
+
Tweak your DeepSpeed related args using this argument. This argument is optional and can be configured
|
| 103 |
+
directly using *accelerate config*
|
| 104 |
+
fsdp_plugin (`FullyShardedDataParallelPlugin`, *optional*):
|
| 105 |
+
Tweak your FSDP related args using this argument. This argument is optional and can be configured directly
|
| 106 |
+
using *accelerate config*
|
| 107 |
+
rng_types (list of `str` or [`~utils.RNGType`]):
|
| 108 |
+
The list of random number generators to synchronize at the beginning of each iteration in your prepared
|
| 109 |
+
dataloaders. Should be one or several of:
|
| 110 |
+
|
| 111 |
+
- `"torch"`: the base torch random number generator
|
| 112 |
+
- `"cuda"`: the CUDA random number generator (GPU only)
|
| 113 |
+
- `"xla"`: the XLA random number generator (TPU only)
|
| 114 |
+
- `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your
|
| 115 |
+
dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type.
|
| 116 |
+
|
| 117 |
+
Will default to `["torch"]` for PyTorch versions <=1.5.1 and `["generator"]` for PyTorch versions >= 1.6.
|
| 118 |
+
log_with (list of `str`, [`~utils.LoggerType`] or [`~tracking.GeneralTracker`], *optional*):
|
| 119 |
+
A list of loggers to be setup for experiment tracking. Should be one or several of:
|
| 120 |
+
|
| 121 |
+
- `"all"`
|
| 122 |
+
- `"tensorboard"`
|
| 123 |
+
- `"wandb"`
|
| 124 |
+
- `"comet_ml"`
|
| 125 |
+
If `"all`" is selected, will pick up all available trackers in the environment and intialize them. Can also
|
| 126 |
+
accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`.
|
| 127 |
+
logging_dir (`str`, `os.PathLike`, *optional*):
|
| 128 |
+
A path to a directory for storing logs of locally-compatible loggers.
|
| 129 |
+
dispatch_batches (`bool`, *optional*):
|
| 130 |
+
If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process
|
| 131 |
+
and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose
|
| 132 |
+
underlying dataset is an `IterableDataset`, `False` otherwise.
|
| 133 |
+
step_scheduler_with_optimizer (`bool`, *optional`, defaults to `True`):
|
| 134 |
+
Set `True` if the learning rate scheduler is stepped at the same time as the optimizer, `False` if only
|
| 135 |
+
done under certain circumstances (at the end of each epoch, for instance).
|
| 136 |
+
kwargs_handlers (`List[KwargHandler]`, *optional*)
|
| 137 |
+
A list of `KwargHandler` to customize how the objects related to distributed training or mixed precision
|
| 138 |
+
are created. See [kwargs](kwargs) for more information.
|
| 139 |
+
|
| 140 |
+
Attributes
|
| 141 |
+
|
| 142 |
+
- **device** (`torch.device`) -- The device to use.
|
| 143 |
+
- **state** ([`~state.AcceleratorState`]) -- The distributed setup state.
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
def __init__(
|
| 147 |
+
self,
|
| 148 |
+
device_placement: bool = True,
|
| 149 |
+
split_batches: bool = False,
|
| 150 |
+
fp16: bool = None,
|
| 151 |
+
mixed_precision: Union[PrecisionType, str] = None,
|
| 152 |
+
gradient_accumulation_steps: int = 1,
|
| 153 |
+
cpu: bool = False,
|
| 154 |
+
deepspeed_plugin: DeepSpeedPlugin = None,
|
| 155 |
+
fsdp_plugin: FullyShardedDataParallelPlugin = None,
|
| 156 |
+
rng_types: Optional[List[Union[str, RNGType]]] = None,
|
| 157 |
+
log_with: Optional[List[Union[str, LoggerType, GeneralTracker]]] = None,
|
| 158 |
+
logging_dir: Optional[Union[str, os.PathLike]] = None,
|
| 159 |
+
dispatch_batches: Optional[bool] = None,
|
| 160 |
+
step_scheduler_with_optimizer: bool = True,
|
| 161 |
+
kwargs_handlers: Optional[List[KwargsHandler]] = None,
|
| 162 |
+
):
|
| 163 |
+
self.logging_dir = logging_dir
|
| 164 |
+
trackers = filter_trackers(log_with, self.logging_dir)
|
| 165 |
+
if len(trackers) < 1 and log_with is not None:
|
| 166 |
+
warnings.warn(f"`log_with={log_with}` was passed but no supported trackers are currently installed.")
|
| 167 |
+
self.log_with = trackers
|
| 168 |
+
|
| 169 |
+
if mixed_precision is not None:
|
| 170 |
+
mixed_precision = str(mixed_precision)
|
| 171 |
+
if mixed_precision not in PrecisionType:
|
| 172 |
+
raise ValueError(
|
| 173 |
+
f"Unknown mixed_precision mode: {mixed_precision}. Choose between {PrecisionType.list()}"
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
if fp16:
|
| 177 |
+
warnings.warn('fp16=True is deprecated. Use mixed_precision="fp16" instead.', DeprecationWarning)
|
| 178 |
+
mixed_precision = "fp16"
|
| 179 |
+
|
| 180 |
+
if deepspeed_plugin is None: # init from env variables
|
| 181 |
+
deepspeed_plugin = DeepSpeedPlugin() if os.environ.get("USE_DEEPSPEED", "false") == "true" else None
|
| 182 |
+
else:
|
| 183 |
+
assert isinstance(
|
| 184 |
+
deepspeed_plugin, DeepSpeedPlugin
|
| 185 |
+
), "`deepspeed_plugin` must be a DeepSpeedPlugin object."
|
| 186 |
+
os.environ["USE_DEEPSPEED"] = "true" # use DeepSpeed if plugin is provided
|
| 187 |
+
if deepspeed_plugin:
|
| 188 |
+
if not is_deepspeed_available():
|
| 189 |
+
raise ImportError("DeepSpeed is not installed => run `pip install deepspeed` or build it from source.")
|
| 190 |
+
if compare_versions("deepspeed", "<", "0.6.5"):
|
| 191 |
+
raise ImportError("DeepSpeed version must be >= 0.6.5. Please update DeepSpeed.")
|
| 192 |
+
|
| 193 |
+
mixed_precision = os.environ.get("MIXED_PRECISION", "no") if mixed_precision is None else mixed_precision
|
| 194 |
+
deepspeed_plugin.set_mixed_precision(mixed_precision)
|
| 195 |
+
deepspeed_plugin.set_deepspeed_weakref()
|
| 196 |
+
|
| 197 |
+
if os.environ.get("USE_FSDP", "false") == "true" or isinstance(fsdp_plugin, FullyShardedDataParallelPlugin):
|
| 198 |
+
if is_torch_version("<", "1.12.0"):
|
| 199 |
+
raise ValueError("FSDP requires PyTorch >= 1.12.0")
|
| 200 |
+
|
| 201 |
+
if fsdp_plugin is None: # init from env variables
|
| 202 |
+
fsdp_plugin = FullyShardedDataParallelPlugin() if os.environ.get("USE_FSDP", "false") == "true" else None
|
| 203 |
+
else:
|
| 204 |
+
if not isinstance(fsdp_plugin, FullyShardedDataParallelPlugin):
|
| 205 |
+
raise TypeError("`fsdp_plugin` must be a FullyShardedDataParallelPlugin object.")
|
| 206 |
+
os.environ["USE_FSDP"] = "true" # use FSDP if plugin is provided
|
| 207 |
+
|
| 208 |
+
# Kwargs handlers
|
| 209 |
+
self.ddp_handler = None
|
| 210 |
+
self.scaler_handler = None
|
| 211 |
+
self.init_handler = None
|
| 212 |
+
if kwargs_handlers is not None:
|
| 213 |
+
for handler in kwargs_handlers:
|
| 214 |
+
assert isinstance(handler, KwargsHandler), f"Unsupported kwargs handler passed: {handler}."
|
| 215 |
+
if isinstance(handler, DistributedDataParallelKwargs):
|
| 216 |
+
if self.ddp_handler is not None:
|
| 217 |
+
raise ValueError("You can only pass one `DistributedDataParallelKwargs` in `kwargs_handler`.")
|
| 218 |
+
else:
|
| 219 |
+
self.ddp_handler = handler
|
| 220 |
+
elif isinstance(handler, GradScalerKwargs):
|
| 221 |
+
if self.scaler_handler is not None:
|
| 222 |
+
raise ValueError("You can only pass one `GradScalerKwargs` in `kwargs_handler`.")
|
| 223 |
+
else:
|
| 224 |
+
self.scaler_handler = handler
|
| 225 |
+
elif isinstance(handler, InitProcessGroupKwargs):
|
| 226 |
+
if self.init_handler is not None:
|
| 227 |
+
raise ValueError("You can only pass one `InitProcessGroupKwargs` in `kwargs_handler`.")
|
| 228 |
+
else:
|
| 229 |
+
self.init_handler = handler
|
| 230 |
+
|
| 231 |
+
kwargs = self.init_handler.to_kwargs() if self.init_handler is not None else {}
|
| 232 |
+
self.state = AcceleratorState(
|
| 233 |
+
mixed_precision=mixed_precision,
|
| 234 |
+
cpu=cpu,
|
| 235 |
+
deepspeed_plugin=deepspeed_plugin,
|
| 236 |
+
fsdp_plugin=fsdp_plugin,
|
| 237 |
+
_from_accelerator=True,
|
| 238 |
+
**kwargs,
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
if gradient_accumulation_steps > 1:
|
| 242 |
+
if self.state.distributed_type == DistributedType.TPU:
|
| 243 |
+
raise NotImplementedError(
|
| 244 |
+
"Gradient accumulation on TPU is not supported. Pass in `gradient_accumulation_steps=1`"
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
self.gradient_accumulation_steps = gradient_accumulation_steps
|
| 248 |
+
self.device_placement = device_placement
|
| 249 |
+
self.split_batches = split_batches
|
| 250 |
+
self.dispatch_batches = dispatch_batches
|
| 251 |
+
if dispatch_batches is True and is_torch_version("<", "1.8.0"):
|
| 252 |
+
raise ImportError(
|
| 253 |
+
"Using `DataLoaderDispatcher` requires PyTorch 1.8.0 minimum. You have {torch.__version__}."
|
| 254 |
+
)
|
| 255 |
+
self.step_scheduler_with_optimizer = step_scheduler_with_optimizer
|
| 256 |
+
|
| 257 |
+
# Mixed precision attributes
|
| 258 |
+
self.scaler = None
|
| 259 |
+
self.native_amp = False
|
| 260 |
+
err = "{mode} mixed precision requires {requirement}"
|
| 261 |
+
if self.state.mixed_precision == "fp16":
|
| 262 |
+
self.native_amp = is_torch_version(">=", "1.6")
|
| 263 |
+
if not self.native_amp:
|
| 264 |
+
raise ValueError(err.format(mode="fp16", requirement="PyTorch >= 1.6"))
|
| 265 |
+
if not torch.cuda.is_available():
|
| 266 |
+
raise ValueError(err.format(mode="fp16", requirement="a GPU"))
|
| 267 |
+
kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None else {}
|
| 268 |
+
if self.distributed_type == DistributedType.FSDP:
|
| 269 |
+
from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler
|
| 270 |
+
|
| 271 |
+
self.scaler = ShardedGradScaler(**kwargs)
|
| 272 |
+
else:
|
| 273 |
+
self.scaler = torch.cuda.amp.GradScaler(**kwargs)
|
| 274 |
+
elif self.state.mixed_precision == "bf16" and self.distributed_type != DistributedType.FSDP:
|
| 275 |
+
self.native_amp = is_bf16_available(True)
|
| 276 |
+
if mixed_precision == "bf16" and not self.native_amp and not is_tpu_available():
|
| 277 |
+
raise ValueError(err.format(mode="bf16", requirement="PyTorch >= 1.10 and a supported device."))
|
| 278 |
+
|
| 279 |
+
# Only on the GPU do we care about scaling the gradients
|
| 280 |
+
if torch.cuda.is_available():
|
| 281 |
+
kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None else {}
|
| 282 |
+
self.scaler = torch.cuda.amp.GradScaler(**kwargs)
|
| 283 |
+
|
| 284 |
+
# Start of internal step tracking
|
| 285 |
+
self.step = 0
|
| 286 |
+
self.gradient_state = GradientState()
|
| 287 |
+
|
| 288 |
+
# Internal references to the training objects
|
| 289 |
+
self._optimizers = []
|
| 290 |
+
self._models = []
|
| 291 |
+
self._schedulers = []
|
| 292 |
+
self._custom_objects = []
|
| 293 |
+
|
| 294 |
+
# RNG Types
|
| 295 |
+
self.rng_types = rng_types
|
| 296 |
+
if self.rng_types is None:
|
| 297 |
+
self.rng_types = ["torch"] if is_torch_version("<=", "1.5.1") else ["generator"]
|
| 298 |
+
|
| 299 |
+
@property
|
| 300 |
+
def use_distributed(self):
|
| 301 |
+
return self.distributed_type != DistributedType.NO and self.num_processes > 1
|
| 302 |
+
|
| 303 |
+
@property
|
| 304 |
+
def distributed_type(self):
|
| 305 |
+
return self.state.distributed_type
|
| 306 |
+
|
| 307 |
+
@property
|
| 308 |
+
def num_processes(self):
|
| 309 |
+
return self.state.num_processes
|
| 310 |
+
|
| 311 |
+
@property
|
| 312 |
+
def process_index(self):
|
| 313 |
+
return self.state.process_index
|
| 314 |
+
|
| 315 |
+
@property
|
| 316 |
+
def local_process_index(self):
|
| 317 |
+
return self.state.local_process_index
|
| 318 |
+
|
| 319 |
+
@property
|
| 320 |
+
def device(self):
|
| 321 |
+
return self.state.device
|
| 322 |
+
|
| 323 |
+
@property
|
| 324 |
+
def is_main_process(self):
|
| 325 |
+
"""True for one process only."""
|
| 326 |
+
return self.process_index == 0
|
| 327 |
+
|
| 328 |
+
@property
|
| 329 |
+
def is_local_main_process(self):
|
| 330 |
+
"""True for one process per server."""
|
| 331 |
+
return self.local_process_index == 0
|
| 332 |
+
|
| 333 |
+
@property
|
| 334 |
+
def use_fp16(self):
|
| 335 |
+
return self.mixed_precision != "no"
|
| 336 |
+
|
| 337 |
+
@property
|
| 338 |
+
def mixed_precision(self):
|
| 339 |
+
if self.distributed_type == DistributedType.DEEPSPEED:
|
| 340 |
+
config = self.state.deepspeed_plugin.deepspeed_config
|
| 341 |
+
if config.get("fp16", {}).get("enabled", False):
|
| 342 |
+
mixed_precision = "fp16"
|
| 343 |
+
elif config.get("bf16", {}).get("enabled", False):
|
| 344 |
+
mixed_precision = "bf16"
|
| 345 |
+
else:
|
| 346 |
+
mixed_precision = "no"
|
| 347 |
+
else:
|
| 348 |
+
mixed_precision = self.state.mixed_precision
|
| 349 |
+
return mixed_precision
|
| 350 |
+
|
| 351 |
+
@contextmanager
|
| 352 |
+
def local_main_process_first(self):
|
| 353 |
+
"""
|
| 354 |
+
Lets the local main process go inside a with block.
|
| 355 |
+
|
| 356 |
+
The other processes will enter the with block after the main process exits.
|
| 357 |
+
"""
|
| 358 |
+
yield from self._goes_first(self.is_local_main_process)
|
| 359 |
+
|
| 360 |
+
@contextmanager
|
| 361 |
+
def main_process_first(self):
|
| 362 |
+
"""
|
| 363 |
+
Lets the main process go first inside a with block.
|
| 364 |
+
|
| 365 |
+
The other processes will enter the with block after the main process exits.
|
| 366 |
+
"""
|
| 367 |
+
yield from self._goes_first(self.is_main_process)
|
| 368 |
+
|
| 369 |
+
def _goes_first(self, is_main):
|
| 370 |
+
if not is_main:
|
| 371 |
+
self.wait_for_everyone()
|
| 372 |
+
|
| 373 |
+
yield
|
| 374 |
+
|
| 375 |
+
if is_main:
|
| 376 |
+
self.wait_for_everyone()
|
| 377 |
+
|
| 378 |
+
@contextmanager
|
| 379 |
+
def no_sync(self, model):
|
| 380 |
+
"""
|
| 381 |
+
A context manager to disable gradient synchronizations across DDP processes by calling
|
| 382 |
+
`torch.nn.parallel.DistributedDataParallel.no_sync`.
|
| 383 |
+
|
| 384 |
+
If `model` is not in DDP, this context manager does nothing
|
| 385 |
+
|
| 386 |
+
Args:
|
| 387 |
+
model (`torch.nn.Module`):
|
| 388 |
+
PyTorch Module that was prepared with `Accelerator.prepare`
|
| 389 |
+
"""
|
| 390 |
+
context = contextlib.nullcontext
|
| 391 |
+
if self.use_distributed:
|
| 392 |
+
context = getattr(model, "no_sync", context)
|
| 393 |
+
|
| 394 |
+
with context():
|
| 395 |
+
yield
|
| 396 |
+
|
| 397 |
+
def _do_sync(self):
|
| 398 |
+
"Sets the right `sync_gradients` context and either resets or increases `self.step`"
|
| 399 |
+
if self.gradient_state.end_of_dataloader:
|
| 400 |
+
self.step = 0
|
| 401 |
+
self.gradient_state._set_sync_gradients(True)
|
| 402 |
+
else:
|
| 403 |
+
self.step += 1
|
| 404 |
+
self.gradient_state._set_sync_gradients((self.step % self.gradient_accumulation_steps) == 0)
|
| 405 |
+
|
| 406 |
+
@property
|
| 407 |
+
def sync_gradients(self):
|
| 408 |
+
return self.gradient_state.sync_gradients
|
| 409 |
+
|
| 410 |
+
@contextmanager
|
| 411 |
+
def accumulate(self, model):
|
| 412 |
+
"""
|
| 413 |
+
A context manager that will lightly wrap around and perform gradient accumulation automatically
|
| 414 |
+
|
| 415 |
+
Args:
|
| 416 |
+
model (`torch.nn.Module`):
|
| 417 |
+
PyTorch Module that was prepared with `Accelerator.prepare`
|
| 418 |
+
"""
|
| 419 |
+
self._do_sync()
|
| 420 |
+
if self.sync_gradients:
|
| 421 |
+
context = contextlib.nullcontext
|
| 422 |
+
else:
|
| 423 |
+
context = self.no_sync
|
| 424 |
+
|
| 425 |
+
with context(model):
|
| 426 |
+
yield
|
| 427 |
+
|
| 428 |
+
def print(self, *args, **kwargs):
|
| 429 |
+
"""
|
| 430 |
+
Use in replacement of `print()` to only print once per server.
|
| 431 |
+
"""
|
| 432 |
+
if self.is_local_main_process:
|
| 433 |
+
print(*args, **kwargs)
|
| 434 |
+
|
| 435 |
+
def _prepare_one(self, obj, first_pass=False):
|
| 436 |
+
# First pass of preparation: DataLoader, model, optimizer
|
| 437 |
+
if isinstance(obj, torch.utils.data.DataLoader) and first_pass:
|
| 438 |
+
return self.prepare_data_loader(obj)
|
| 439 |
+
elif isinstance(obj, torch.nn.Module) and first_pass:
|
| 440 |
+
self._models.append(obj)
|
| 441 |
+
return self.prepare_model(obj)
|
| 442 |
+
elif isinstance(obj, torch.optim.Optimizer) and first_pass:
|
| 443 |
+
optimizer = self.prepare_optimizer(obj)
|
| 444 |
+
self._optimizers.append(optimizer)
|
| 445 |
+
return optimizer
|
| 446 |
+
# Second pass of preparation: LR scheduler (which need the full list of optimizers)
|
| 447 |
+
elif isinstance(obj, torch.optim.lr_scheduler._LRScheduler) and not first_pass:
|
| 448 |
+
scheduler = self.prepare_scheduler(obj)
|
| 449 |
+
self._schedulers.append(scheduler)
|
| 450 |
+
return scheduler
|
| 451 |
+
else:
|
| 452 |
+
return obj
|
| 453 |
+
|
| 454 |
+
def _prepare_fsdp(self, *args):
|
| 455 |
+
result = []
|
| 456 |
+
for obj in args:
|
| 457 |
+
if isinstance(obj, torch.nn.Module):
|
| 458 |
+
model = obj
|
| 459 |
+
break
|
| 460 |
+
optimizers = []
|
| 461 |
+
|
| 462 |
+
self._schedulers = []
|
| 463 |
+
self._models = []
|
| 464 |
+
intermediate_result = []
|
| 465 |
+
for obj in args:
|
| 466 |
+
if isinstance(obj, torch.optim.Optimizer):
|
| 467 |
+
if len(obj.param_groups) > 1:
|
| 468 |
+
logger.warn(
|
| 469 |
+
"FSDP Warning: When using FSDP, several parameter groups will be conflated into "
|
| 470 |
+
"a single one due to nested module wrapping and parameter flattening."
|
| 471 |
+
)
|
| 472 |
+
optimizer = obj.optimizer.__class__(model.parameters(), **obj.optimizer.defaults)
|
| 473 |
+
obj = self.prepare_optimizer(optimizer)
|
| 474 |
+
optimizers.append(obj)
|
| 475 |
+
elif isinstance(obj, torch.nn.Module):
|
| 476 |
+
self._models.append(obj)
|
| 477 |
+
intermediate_result.append(obj)
|
| 478 |
+
|
| 479 |
+
for obj in intermediate_result:
|
| 480 |
+
if isinstance(obj, AcceleratedScheduler):
|
| 481 |
+
obj.optimizer = optimizers
|
| 482 |
+
for i, opt in enumerate(self._optimizers):
|
| 483 |
+
if getattr(obj.scheduler, "optimizer", None) == opt.optimizer:
|
| 484 |
+
obj.scheduler.optimizer = optimizers[i]
|
| 485 |
+
obj.optimizers = [optimizers[i]]
|
| 486 |
+
break
|
| 487 |
+
self._schedulers.append(obj)
|
| 488 |
+
result.append(obj)
|
| 489 |
+
self._optimizers = optimizers
|
| 490 |
+
return tuple(result)
|
| 491 |
+
|
| 492 |
+
def prepare(self, *args):
|
| 493 |
+
"""
|
| 494 |
+
Prepare all objects passed in `args` for distributed training and mixed precision, then return them in the same
|
| 495 |
+
order.
|
| 496 |
+
|
| 497 |
+
Accepts the following type of objects:
|
| 498 |
+
|
| 499 |
+
- `torch.utils.data.DataLoader`: PyTorch Dataloader
|
| 500 |
+
- `torch.nn.Module`: PyTorch Module
|
| 501 |
+
- `torch.optim.Optimizer`: PyTorch Optimizer
|
| 502 |
+
"""
|
| 503 |
+
if self.distributed_type == DistributedType.FSDP:
|
| 504 |
+
model_count = 0
|
| 505 |
+
optimizer_present = False
|
| 506 |
+
for obj in args:
|
| 507 |
+
if isinstance(obj, torch.nn.Module):
|
| 508 |
+
model_count += 1
|
| 509 |
+
if isinstance(obj, torch.optim.Optimizer):
|
| 510 |
+
optimizer_present = True
|
| 511 |
+
if model_count > 1 and optimizer_present:
|
| 512 |
+
raise ValueError(
|
| 513 |
+
"For FSDP to work with multiple models (>1), "
|
| 514 |
+
"prepare must be called for all the models before optimizers are created"
|
| 515 |
+
)
|
| 516 |
+
elif model_count == 1 and optimizer_present:
|
| 517 |
+
logger.warn(
|
| 518 |
+
"FSDP Warning: When using FSDP, "
|
| 519 |
+
"it is efficient and recommended to call prepare for the model before creating the optimizer"
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
# On TPUs, putting the model on the XLA device will create new parameters, so the corresponding optimizer will
|
| 523 |
+
# have parameters disconnected from the model (so no training :-( ).
|
| 524 |
+
# If the model and optimizer have parameters on different devices we raise an error.
|
| 525 |
+
if self.distributed_type == DistributedType.TPU:
|
| 526 |
+
model_device, optimizer_device = self._get_devices()
|
| 527 |
+
if model_device is not None and optimizer_device is not None and model_device != optimizer_device:
|
| 528 |
+
raise ValueError(
|
| 529 |
+
"The model and the optimizer parameters are not on the same device, which probably means you "
|
| 530 |
+
"created an optimizer around your model **before** putting on the device. Make sure the line "
|
| 531 |
+
"model.to(device) is before the optimizer creation in your script or remove it entirely and use "
|
| 532 |
+
"the flag default value for `devicement_placement` in your `Accelerator` to let it handle that "
|
| 533 |
+
"part for you."
|
| 534 |
+
)
|
| 535 |
+
|
| 536 |
+
# If we're dealing with device placement, this deals with that by...
|
| 537 |
+
tpu_should_fix_optimizer = self.device_placement and self.distributed_type == DistributedType.TPU
|
| 538 |
+
if tpu_should_fix_optimizer:
|
| 539 |
+
# 1. grabbing old model parameters
|
| 540 |
+
old_named_params = self._get_named_parameters(*args)
|
| 541 |
+
|
| 542 |
+
if self.distributed_type == DistributedType.DEEPSPEED:
|
| 543 |
+
result = self._prepare_deepspeed(*args)
|
| 544 |
+
else:
|
| 545 |
+
result = tuple(self._prepare_one(obj, first_pass=True) for obj in args)
|
| 546 |
+
result = tuple(self._prepare_one(obj) for obj in result)
|
| 547 |
+
|
| 548 |
+
if tpu_should_fix_optimizer:
|
| 549 |
+
# 2. grabbing new model parameters
|
| 550 |
+
new_named_params = self._get_named_parameters(*result)
|
| 551 |
+
# 3. building a map from the first to the second
|
| 552 |
+
mapping = {p: new_named_params[n] for n, p in old_named_params.items()}
|
| 553 |
+
# 4. using that map to update the parameters of the optimizer
|
| 554 |
+
for obj in result:
|
| 555 |
+
if isinstance(obj, torch.optim.Optimizer):
|
| 556 |
+
obj._switch_parameters(mapping)
|
| 557 |
+
|
| 558 |
+
if self.distributed_type == DistributedType.FSDP and model_count == 1 and optimizer_present:
|
| 559 |
+
result = self._prepare_fsdp(*result)
|
| 560 |
+
|
| 561 |
+
return result if len(result) > 1 else result[0]
|
| 562 |
+
|
| 563 |
+
def prepare_model(self, model):
|
| 564 |
+
if self.device_placement and self.distributed_type != DistributedType.FSDP:
|
| 565 |
+
model = model.to(self.device)
|
| 566 |
+
if self.distributed_type == DistributedType.MULTI_GPU:
|
| 567 |
+
kwargs = self.ddp_handler.to_kwargs() if self.ddp_handler is not None else {}
|
| 568 |
+
model = torch.nn.parallel.DistributedDataParallel(
|
| 569 |
+
model, device_ids=[self.local_process_index], output_device=self.local_process_index, **kwargs
|
| 570 |
+
)
|
| 571 |
+
elif self.distributed_type == DistributedType.FSDP:
|
| 572 |
+
from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP
|
| 573 |
+
|
| 574 |
+
# Check if the model is already a FSDP model due to `Manual Wrapping` and if so,
|
| 575 |
+
# don't wrap it again
|
| 576 |
+
if type(model) != FSDP:
|
| 577 |
+
self.state.fsdp_plugin.set_auto_wrap_policy(model)
|
| 578 |
+
fsdp_plugin = self.state.fsdp_plugin
|
| 579 |
+
model = FSDP(
|
| 580 |
+
model,
|
| 581 |
+
sharding_strategy=fsdp_plugin.sharding_strategy,
|
| 582 |
+
cpu_offload=fsdp_plugin.cpu_offload,
|
| 583 |
+
auto_wrap_policy=fsdp_plugin.auto_wrap_policy,
|
| 584 |
+
backward_prefetch=fsdp_plugin.backward_prefetch,
|
| 585 |
+
mixed_precision=fsdp_plugin.mixed_precision_policy,
|
| 586 |
+
ignored_modules=fsdp_plugin.ignored_modules,
|
| 587 |
+
)
|
| 588 |
+
if not fsdp_plugin.cpu_offload.offload_params:
|
| 589 |
+
model.to(self.device)
|
| 590 |
+
elif self.distributed_type == DistributedType.MULTI_CPU:
|
| 591 |
+
kwargs = self.ddp_handler.to_kwargs() if self.ddp_handler is not None else {}
|
| 592 |
+
model = torch.nn.parallel.DistributedDataParallel(model, **kwargs)
|
| 593 |
+
if self.native_amp:
|
| 594 |
+
if self.mixed_precision == "fp16" and is_torch_version(">=", "1.10"):
|
| 595 |
+
model.forward = torch.cuda.amp.autocast(dtype=torch.float16)(model.forward)
|
| 596 |
+
elif self.mixed_precision == "bf16" and self.distributed_type != DistributedType.TPU:
|
| 597 |
+
device_type = "cuda" if torch.cuda.is_available() else "cpu"
|
| 598 |
+
model.forward = torch.autocast(device_type=device_type, dtype=torch.bfloat16)(model.forward)
|
| 599 |
+
else:
|
| 600 |
+
model.forward = torch.cuda.amp.autocast()(model.forward)
|
| 601 |
+
model.forward = convert_outputs_to_fp32(model.forward)
|
| 602 |
+
if self.distributed_type == DistributedType.TPU and self.state.fork_launched:
|
| 603 |
+
model = xmp.MpModelWrapper(model).to(self.device)
|
| 604 |
+
return model
|
| 605 |
+
|
| 606 |
+
def _prepare_deepspeed(self, *args):
|
| 607 |
+
|
| 608 |
+
deepspeed_plugin = self.state.deepspeed_plugin
|
| 609 |
+
|
| 610 |
+
result = [
|
| 611 |
+
self._prepare_one(obj, first_pass=True) if isinstance(obj, torch.utils.data.DataLoader) else obj
|
| 612 |
+
for obj in args
|
| 613 |
+
]
|
| 614 |
+
|
| 615 |
+
batch_sizes = [obj.batch_size for obj in args if hasattr(obj, "batch_size")]
|
| 616 |
+
if self.split_batches:
|
| 617 |
+
batch_sizes = [batch_size // self.num_processes for batch_size in batch_sizes]
|
| 618 |
+
if len(batch_sizes) == 0:
|
| 619 |
+
raise ValueError(
|
| 620 |
+
"You must specify a training or evaluation dataloader in `accelerate.prepare()` when using DeepSpeed."
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
batch_size_per_device = min(batch_sizes) if deepspeed_plugin.is_train_batch_min else max(batch_sizes)
|
| 624 |
+
if len(batch_sizes) > 1:
|
| 625 |
+
logger.info(
|
| 626 |
+
"Since you passed both train and evaluation dataloader, `is_train_batch_min` (here "
|
| 627 |
+
f"{deepspeed_plugin.is_train_batch_min} will decide the `train_batch_size` ({batch_size_per_device})."
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
config_kwargs = {
|
| 631 |
+
"train_micro_batch_size_per_gpu": batch_size_per_device,
|
| 632 |
+
"train_batch_size": batch_size_per_device
|
| 633 |
+
* deepspeed_plugin.deepspeed_config["gradient_accumulation_steps"]
|
| 634 |
+
* self.num_processes,
|
| 635 |
+
"gradient_clipping": 1.0,
|
| 636 |
+
"zero_optimization.stage3_gather_16bit_weights_on_model_save": False,
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
model = None
|
| 640 |
+
optimizer = None
|
| 641 |
+
scheduler = None
|
| 642 |
+
for obj in result:
|
| 643 |
+
if isinstance(obj, torch.nn.Module):
|
| 644 |
+
model = obj
|
| 645 |
+
elif isinstance(obj, (torch.optim.Optimizer, DummyOptim)):
|
| 646 |
+
optimizer = obj
|
| 647 |
+
elif (isinstance(obj, (torch.optim.lr_scheduler._LRScheduler, DummyScheduler))) or (
|
| 648 |
+
type(obj).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES
|
| 649 |
+
):
|
| 650 |
+
scheduler = obj
|
| 651 |
+
|
| 652 |
+
if optimizer is not None:
|
| 653 |
+
if "optimizer" in deepspeed_plugin.deepspeed_config and not isinstance(optimizer, (DummyOptim)):
|
| 654 |
+
raise ValueError(
|
| 655 |
+
"You cannot specify an optimizer in the config file and in the code at the same time. "
|
| 656 |
+
"Please remove the optimizer from the config file or "
|
| 657 |
+
"create `accelerate.utils.DummyOptim` in the code."
|
| 658 |
+
)
|
| 659 |
+
elif "optimizer" not in deepspeed_plugin.deepspeed_config and isinstance(optimizer, (DummyOptim)):
|
| 660 |
+
raise ValueError(
|
| 661 |
+
"You cannot create a `DummyOptim` without specifying an optimizer in the config file."
|
| 662 |
+
)
|
| 663 |
+
|
| 664 |
+
if isinstance(optimizer, (torch.optim.Optimizer)):
|
| 665 |
+
deepspeed_plugin.deepspeed_config["zero_allow_untested_optimizer"] = True
|
| 666 |
+
|
| 667 |
+
if scheduler is not None:
|
| 668 |
+
if "scheduler" in deepspeed_plugin.deepspeed_config and not isinstance(scheduler, (DummyScheduler)):
|
| 669 |
+
raise ValueError(
|
| 670 |
+
"You cannot specify a scheduler in the config file and in the code at the same time. "
|
| 671 |
+
"Please remove the scheduler from the config file or "
|
| 672 |
+
"create `accelerate.utils.DummyScheduler` in the code."
|
| 673 |
+
)
|
| 674 |
+
elif "scheduler" not in deepspeed_plugin.deepspeed_config and isinstance(scheduler, (DummyScheduler)):
|
| 675 |
+
raise ValueError(
|
| 676 |
+
"You cannot create a `DummyScheduler` without specifying a scheduler in the config file."
|
| 677 |
+
)
|
| 678 |
+
|
| 679 |
+
if optimizer is not None and scheduler is not None:
|
| 680 |
+
if isinstance(optimizer, (DummyOptim)) and not isinstance(scheduler, (DummyScheduler)):
|
| 681 |
+
raise ValueError(
|
| 682 |
+
"You can only specify `accelerate.utils.DummyScheduler` in the code when using "
|
| 683 |
+
"`accelerate.utils.DummyOptim`."
|
| 684 |
+
)
|
| 685 |
+
|
| 686 |
+
if model is not None:
|
| 687 |
+
if hasattr(model, "config") and hasattr(model.config, "hidden_size"):
|
| 688 |
+
hidden_size = model.config.hidden_size
|
| 689 |
+
config_kwargs.update(
|
| 690 |
+
{
|
| 691 |
+
"zero_optimization.reduce_bucket_size": hidden_size * hidden_size,
|
| 692 |
+
"zero_optimization.stage3_prefetch_bucket_size": 0.9 * hidden_size * hidden_size,
|
| 693 |
+
"zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size,
|
| 694 |
+
}
|
| 695 |
+
)
|
| 696 |
+
|
| 697 |
+
if isinstance(optimizer, (DummyOptim)):
|
| 698 |
+
config_kwargs.update(
|
| 699 |
+
{"optimizer.params.lr": optimizer.lr, "optimizer.params.weight_decay": optimizer.weight_decay}
|
| 700 |
+
)
|
| 701 |
+
if isinstance(scheduler, (DummyScheduler)):
|
| 702 |
+
config_kwargs.update(
|
| 703 |
+
{
|
| 704 |
+
"scheduler.params.warmup_min_lr": 0,
|
| 705 |
+
"scheduler.params.warmup_max_lr": scheduler.optimizer.lr,
|
| 706 |
+
"scheduler.params.warmup_num_steps": scheduler.warmup_num_steps,
|
| 707 |
+
}
|
| 708 |
+
)
|
| 709 |
+
if scheduler.total_num_steps is not None:
|
| 710 |
+
config_kwargs["scheduler.params.total_num_steps"] = (
|
| 711 |
+
math.ceil(scheduler.total_num_steps / self.num_processes)
|
| 712 |
+
if not self.split_batches
|
| 713 |
+
else scheduler.total_num_steps
|
| 714 |
+
)
|
| 715 |
+
deepspeed_plugin.deepspeed_config_process(must_match=False, **config_kwargs)
|
| 716 |
+
self.deepspeed_config = deepspeed_plugin.deepspeed_config
|
| 717 |
+
kwargs = dict(model=model, config_params=self.deepspeed_config)
|
| 718 |
+
if optimizer is not None:
|
| 719 |
+
if isinstance(optimizer, (DummyOptim)):
|
| 720 |
+
kwargs["model_parameters"] = optimizer.params
|
| 721 |
+
else:
|
| 722 |
+
kwargs["optimizer"] = optimizer
|
| 723 |
+
if scheduler is not None:
|
| 724 |
+
if type(scheduler).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES:
|
| 725 |
+
kwargs["lr_scheduler"] = scheduler
|
| 726 |
+
|
| 727 |
+
engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs)
|
| 728 |
+
if optimizer is not None:
|
| 729 |
+
optimizer = DeepSpeedOptimizerWrapper(optimizer)
|
| 730 |
+
if scheduler is not None:
|
| 731 |
+
if lr_scheduler is None:
|
| 732 |
+
scheduler = AcceleratedScheduler(
|
| 733 |
+
scheduler,
|
| 734 |
+
optimizer,
|
| 735 |
+
step_with_optimizer=self.step_scheduler_with_optimizer,
|
| 736 |
+
split_batches=self.split_batches,
|
| 737 |
+
)
|
| 738 |
+
else:
|
| 739 |
+
scheduler = DeepSpeedSchedulerWrapper(lr_scheduler, optimizer)
|
| 740 |
+
|
| 741 |
+
for i in range(len(result)):
|
| 742 |
+
if isinstance(result[i], torch.nn.Module):
|
| 743 |
+
result[i] = engine
|
| 744 |
+
elif isinstance(result[i], (torch.optim.Optimizer, DummyOptim)):
|
| 745 |
+
result[i] = optimizer
|
| 746 |
+
elif (isinstance(result[i], (torch.optim.lr_scheduler._LRScheduler, DummyScheduler))) or (
|
| 747 |
+
type(result[i]).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES
|
| 748 |
+
):
|
| 749 |
+
result[i] = scheduler
|
| 750 |
+
# pointing for deepspeed_engine_wrapped.backward()
|
| 751 |
+
self.deepspeed_engine_wrapped = DeepSpeedEngineWrapper(engine)
|
| 752 |
+
self._models.append(engine)
|
| 753 |
+
if optimizer is not None:
|
| 754 |
+
self._optimizers.append(optimizer)
|
| 755 |
+
if scheduler is not None:
|
| 756 |
+
self._schedulers.append(scheduler)
|
| 757 |
+
if len(self._models) > 1:
|
| 758 |
+
raise AssertionError(
|
| 759 |
+
"You can't use same `Accelerator()` instance with multiple models when using DeepSpeed"
|
| 760 |
+
)
|
| 761 |
+
return tuple(result)
|
| 762 |
+
|
| 763 |
+
def prepare_data_loader(self, data_loader):
|
| 764 |
+
return prepare_data_loader(
|
| 765 |
+
data_loader,
|
| 766 |
+
self.device,
|
| 767 |
+
num_processes=self.num_processes,
|
| 768 |
+
process_index=self.process_index,
|
| 769 |
+
split_batches=self.split_batches,
|
| 770 |
+
put_on_device=self.device_placement if self.distributed_type != DistributedType.TPU else False,
|
| 771 |
+
rng_types=self.rng_types.copy(),
|
| 772 |
+
dispatch_batches=self.dispatch_batches,
|
| 773 |
+
)
|
| 774 |
+
|
| 775 |
+
def prepare_optimizer(self, optimizer):
|
| 776 |
+
return AcceleratedOptimizer(optimizer, device_placement=self.device_placement, scaler=self.scaler)
|
| 777 |
+
|
| 778 |
+
def prepare_scheduler(self, scheduler):
|
| 779 |
+
# We try to find the optimizer associated with `scheduler`, the default is the full list.
|
| 780 |
+
optimizer = self._optimizers
|
| 781 |
+
for opt in self._optimizers:
|
| 782 |
+
if getattr(scheduler, "optimizer", None) == opt.optimizer:
|
| 783 |
+
optimizer = opt
|
| 784 |
+
break
|
| 785 |
+
|
| 786 |
+
return AcceleratedScheduler(
|
| 787 |
+
scheduler,
|
| 788 |
+
optimizer,
|
| 789 |
+
step_with_optimizer=self.step_scheduler_with_optimizer,
|
| 790 |
+
split_batches=self.split_batches,
|
| 791 |
+
)
|
| 792 |
+
|
| 793 |
+
def backward(self, loss, **kwargs):
|
| 794 |
+
"""
|
| 795 |
+
Use `accelerator.backward(loss)` in lieu of `loss.backward()`.
|
| 796 |
+
"""
|
| 797 |
+
loss /= self.gradient_accumulation_steps
|
| 798 |
+
if self.distributed_type == DistributedType.DEEPSPEED:
|
| 799 |
+
self.deepspeed_engine_wrapped.backward(loss, **kwargs)
|
| 800 |
+
elif self.scaler is not None:
|
| 801 |
+
self.scaler.scale(loss).backward(**kwargs)
|
| 802 |
+
else:
|
| 803 |
+
loss.backward(**kwargs)
|
| 804 |
+
|
| 805 |
+
def unscale_gradients(self, optimizer=None):
|
| 806 |
+
"""
|
| 807 |
+
Unscale the gradients in mixed precision training with AMP. This is a noop in all other settings.
|
| 808 |
+
|
| 809 |
+
Args:
|
| 810 |
+
optimizer (`torch.optim.Optimizer` or `List[torch.optim.Optimizer]`, *optional*):
|
| 811 |
+
The optimizer(s) for which to unscale gradients. If not set, will unscale gradients on all optimizers
|
| 812 |
+
that were passed to [`~Accelerator.prepare`].
|
| 813 |
+
"""
|
| 814 |
+
if self.use_fp16 and self.native_amp:
|
| 815 |
+
if optimizer is None:
|
| 816 |
+
# TODO: this unscales all optimizers where we should only unscale the one where parameters are.
|
| 817 |
+
optimizer = self._optimizers
|
| 818 |
+
elif not isinstance(optimizer, (tuple, list)):
|
| 819 |
+
optimizer = [optimizer]
|
| 820 |
+
for opt in optimizer:
|
| 821 |
+
while isinstance(opt, AcceleratedOptimizer):
|
| 822 |
+
opt = opt.optimizer
|
| 823 |
+
self.scaler.unscale_(opt)
|
| 824 |
+
|
| 825 |
+
def clip_grad_norm_(self, parameters, max_norm, norm_type=2):
|
| 826 |
+
"""
|
| 827 |
+
Should be used in place of `torch.nn.utils.clip_grad_norm_`.
|
| 828 |
+
"""
|
| 829 |
+
if self.distributed_type == DistributedType.FSDP:
|
| 830 |
+
self.unscale_gradients()
|
| 831 |
+
parameters = [p for p in parameters]
|
| 832 |
+
for model in self._models:
|
| 833 |
+
if parameters == [p for p in model.parameters()]:
|
| 834 |
+
model.clip_grad_norm_(max_norm, norm_type)
|
| 835 |
+
return
|
| 836 |
+
elif self.distributed_type == DistributedType.DEEPSPEED:
|
| 837 |
+
# `accelerator.backward(loss)` is doing that automatically. Therefore, it's implementation is not needed
|
| 838 |
+
return
|
| 839 |
+
self.unscale_gradients()
|
| 840 |
+
torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=norm_type)
|
| 841 |
+
|
| 842 |
+
def clip_grad_value_(self, parameters, clip_value):
|
| 843 |
+
"""
|
| 844 |
+
Should be used in place of `torch.nn.utils.clip_grad_value_`.
|
| 845 |
+
"""
|
| 846 |
+
if self.distributed_type in [DistributedType.DEEPSPEED, DistributedType.FSDP]:
|
| 847 |
+
raise Exception("DeepSpeed and FSDP do not support `clip_grad_value_`. Use `clip_grad_norm_` instead.")
|
| 848 |
+
self.unscale_gradients()
|
| 849 |
+
torch.nn.utils.clip_grad_value_(parameters, clip_value)
|
| 850 |
+
|
| 851 |
+
def gather(self, tensor):
|
| 852 |
+
"""
|
| 853 |
+
Gather the values in *tensor* across all processes and concatenate them on the first dimension. Useful to
|
| 854 |
+
regroup the predictions from all processes when doing evaluation.
|
| 855 |
+
|
| 856 |
+
Note:
|
| 857 |
+
This gather happens in all processes.
|
| 858 |
+
|
| 859 |
+
Args:
|
| 860 |
+
tensor (`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`):
|
| 861 |
+
The tensors to gather across all processes.
|
| 862 |
+
|
| 863 |
+
Returns:
|
| 864 |
+
`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: The gathered tensor(s). Note that the
|
| 865 |
+
first dimension of the result is *num_processes* multiplied by the first dimension of the input tensors.
|
| 866 |
+
"""
|
| 867 |
+
return gather(tensor)
|
| 868 |
+
|
| 869 |
+
def reduce(self, tensor, reduction="sum"):
|
| 870 |
+
"""
|
| 871 |
+
Reduce the values in *tensor* across all processes based on *reduction*.
|
| 872 |
+
|
| 873 |
+
Note:
|
| 874 |
+
All processes get the reduced value.
|
| 875 |
+
|
| 876 |
+
Args:
|
| 877 |
+
tensor (`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`):
|
| 878 |
+
The tensors to reduce across all processes.
|
| 879 |
+
reduction (`str`, *optional*, defaults to "sum"):
|
| 880 |
+
A reduction type, can be one of 'sum', 'mean', or 'none'. If 'none', will not perform any operation.
|
| 881 |
+
|
| 882 |
+
Returns:
|
| 883 |
+
`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: The reduced tensor(s).
|
| 884 |
+
"""
|
| 885 |
+
return reduce(tensor, reduction)
|
| 886 |
+
|
| 887 |
+
def pad_across_processes(self, tensor, dim=0, pad_index=0, pad_first=False):
|
| 888 |
+
"""
|
| 889 |
+
Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so
|
| 890 |
+
they can safely be gathered.
|
| 891 |
+
|
| 892 |
+
Args:
|
| 893 |
+
tensor (nested list/tuple/dictionary of `torch.Tensor`):
|
| 894 |
+
The data to gather.
|
| 895 |
+
dim (`int`, *optional*, defaults to 0):
|
| 896 |
+
The dimension on which to pad.
|
| 897 |
+
pad_index (`int`, *optional*, defaults to 0):
|
| 898 |
+
The value with which to pad.
|
| 899 |
+
pad_first (`bool`, *optional*, defaults to `False`):
|
| 900 |
+
Whether to pad at the beginning or the end.
|
| 901 |
+
"""
|
| 902 |
+
return pad_across_processes(tensor, dim=dim, pad_index=pad_index, pad_first=pad_first)
|
| 903 |
+
|
| 904 |
+
def unwrap_model(self, model):
|
| 905 |
+
"""
|
| 906 |
+
Unwraps the `model` from the additional layer possible added by [`~Accelerator.prepare`]. Useful before saving
|
| 907 |
+
the model.
|
| 908 |
+
|
| 909 |
+
Args:
|
| 910 |
+
model (`torch.nn.Module`):
|
| 911 |
+
The model to unwrap.
|
| 912 |
+
"""
|
| 913 |
+
return extract_model_from_parallel(model)
|
| 914 |
+
|
| 915 |
+
def wait_for_everyone(self):
|
| 916 |
+
"""
|
| 917 |
+
Will stop the execution of the current process until every other process has reached that point (so this does
|
| 918 |
+
nothing when the script is only run in one process). Useful to do before saving a model.
|
| 919 |
+
"""
|
| 920 |
+
wait_for_everyone()
|
| 921 |
+
|
| 922 |
+
def init_trackers(self, project_name: str, config: Optional[dict] = None):
|
| 923 |
+
"""
|
| 924 |
+
Initializes a run for all trackers stored in `self.log_with`, potentially with starting configurations
|
| 925 |
+
|
| 926 |
+
Args:
|
| 927 |
+
project_name (`str`):
|
| 928 |
+
The name of the project. All trackers will save their data based on this
|
| 929 |
+
config (`dict`, *optional*):
|
| 930 |
+
Optional starting configuration to be logged.
|
| 931 |
+
"""
|
| 932 |
+
self.trackers = []
|
| 933 |
+
for tracker in self.log_with:
|
| 934 |
+
if issubclass(type(tracker), GeneralTracker):
|
| 935 |
+
# Custom trackers are already initialized
|
| 936 |
+
self.trackers.append(tracker)
|
| 937 |
+
else:
|
| 938 |
+
tracker_init = LOGGER_TYPE_TO_CLASS[str(tracker)]
|
| 939 |
+
if getattr(tracker_init, "requires_logging_directory"):
|
| 940 |
+
# We can skip this check since it was done in `__init__`
|
| 941 |
+
self.trackers.append(tracker_init(project_name, self.logging_dir))
|
| 942 |
+
else:
|
| 943 |
+
self.trackers.append(tracker_init(project_name))
|
| 944 |
+
if config is not None:
|
| 945 |
+
for tracker in self.trackers:
|
| 946 |
+
tracker.store_init_configuration(config)
|
| 947 |
+
|
| 948 |
+
def log(self, values: dict, step: Optional[int] = None):
|
| 949 |
+
"""
|
| 950 |
+
Logs `values` to all stored trackers in `self.trackers`.
|
| 951 |
+
|
| 952 |
+
Args:
|
| 953 |
+
values (`dict`):
|
| 954 |
+
Values should be a dictionary-like object containing only types `int`, `float`, or `str`.
|
| 955 |
+
step (`int`, *optional*):
|
| 956 |
+
The run step. If included, the log will be affiliated with this step.
|
| 957 |
+
"""
|
| 958 |
+
if self.is_main_process:
|
| 959 |
+
for tracker in self.trackers:
|
| 960 |
+
tracker.log(values, step=step)
|
| 961 |
+
|
| 962 |
+
def end_training(self):
|
| 963 |
+
"""
|
| 964 |
+
Runs any special end training behaviors, such as stopping trackers
|
| 965 |
+
"""
|
| 966 |
+
if self.is_main_process:
|
| 967 |
+
for tracker in self.trackers:
|
| 968 |
+
tracker.finish()
|
| 969 |
+
|
| 970 |
+
def save(self, obj, f):
|
| 971 |
+
"""
|
| 972 |
+
Save the object passed to disk once per machine. Use in place of `torch.save`.
|
| 973 |
+
|
| 974 |
+
Args:
|
| 975 |
+
obj: The object to save.
|
| 976 |
+
f (`str` or `os.PathLike`):
|
| 977 |
+
Where to save the content of `obj`.
|
| 978 |
+
"""
|
| 979 |
+
save(obj, f)
|
| 980 |
+
|
| 981 |
+
def save_state(self, output_dir: str):
|
| 982 |
+
"""
|
| 983 |
+
Saves the current states of the model, optimizer, scaler, RNG generators, and registered objects.
|
| 984 |
+
|
| 985 |
+
Args:
|
| 986 |
+
output_dir (`str` or `os.PathLike`):
|
| 987 |
+
The name of the folder to save all relevant weights and states.
|
| 988 |
+
"""
|
| 989 |
+
# Check if folder exists
|
| 990 |
+
output_dir = os.path.expanduser(output_dir)
|
| 991 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 992 |
+
logger.info(f"Saving current state to {output_dir}")
|
| 993 |
+
weights = [self.get_state_dict(m, unwrap=False) for m in self._models]
|
| 994 |
+
save_location = save_accelerator_state(
|
| 995 |
+
output_dir, weights, self._optimizers, self._schedulers, self.state.process_index, self.scaler
|
| 996 |
+
)
|
| 997 |
+
for i, obj in enumerate(self._custom_objects):
|
| 998 |
+
save_custom_state(obj, output_dir, i)
|
| 999 |
+
return save_location
|
| 1000 |
+
|
| 1001 |
+
def load_state(self, input_dir: str):
|
| 1002 |
+
"""
|
| 1003 |
+
Loads the current states of the model, optimizer, scaler, RNG generators, and registered objects.
|
| 1004 |
+
|
| 1005 |
+
Args:
|
| 1006 |
+
input_dir (`str` or `os.PathLike`):
|
| 1007 |
+
The name of the folder all relevant weights and states were saved in.
|
| 1008 |
+
"""
|
| 1009 |
+
# Check if folder exists
|
| 1010 |
+
input_dir = os.path.expanduser(input_dir)
|
| 1011 |
+
if not os.path.isdir(input_dir):
|
| 1012 |
+
raise ValueError(f"Tried to find {input_dir} but folder does not exist")
|
| 1013 |
+
logger.info(f"Loading states from {input_dir}")
|
| 1014 |
+
load_accelerator_state(
|
| 1015 |
+
input_dir, self._models, self._optimizers, self._schedulers, self.state.process_index, self.scaler
|
| 1016 |
+
)
|
| 1017 |
+
custom_checkpoints = [f for f in os.listdir(input_dir) if "custom_checkpoint" in f]
|
| 1018 |
+
if len(custom_checkpoints) != len(self._custom_objects):
|
| 1019 |
+
err = "Warning! Number of found checkpoints does not match the number of registered objects:"
|
| 1020 |
+
err += f"\n\tFound checkpoints: {len(custom_checkpoints)}"
|
| 1021 |
+
err += f"\n\tRegistered objects: {len(self._custom_objects)}\nSkipping."
|
| 1022 |
+
logger.warn(err)
|
| 1023 |
+
else:
|
| 1024 |
+
logger.info(f"Loading in {len(custom_checkpoints)} custom states")
|
| 1025 |
+
for index, obj in enumerate(self._custom_objects):
|
| 1026 |
+
load_custom_state(obj, input_dir, index)
|
| 1027 |
+
|
| 1028 |
+
def free_memory(self):
|
| 1029 |
+
"""
|
| 1030 |
+
Will release all references to the internal objects stored and call the garbage collector. You should call this
|
| 1031 |
+
method between two trainings with different models/optimizers.
|
| 1032 |
+
"""
|
| 1033 |
+
self._schedulers = []
|
| 1034 |
+
self._optimizers = []
|
| 1035 |
+
self._models = []
|
| 1036 |
+
self.deepspeed_engine_wrapped = None
|
| 1037 |
+
gc.collect()
|
| 1038 |
+
torch.cuda.empty_cache()
|
| 1039 |
+
|
| 1040 |
+
def clear(self):
|
| 1041 |
+
"""
|
| 1042 |
+
Alias for [`Accelerate.free_memory`], releases all references to the internal objects stored and call the
|
| 1043 |
+
garbage collector. You should call this method between two trainings with different models/optimizers.
|
| 1044 |
+
"""
|
| 1045 |
+
self.free_memory()
|
| 1046 |
+
|
| 1047 |
+
def _get_named_parameters(self, *args):
|
| 1048 |
+
named_parameters = {}
|
| 1049 |
+
for obj in args:
|
| 1050 |
+
if isinstance(obj, torch.nn.Module):
|
| 1051 |
+
obj = extract_model_from_parallel(obj)
|
| 1052 |
+
named_parameters.update({n: p for n, p in obj.named_parameters()})
|
| 1053 |
+
return named_parameters
|
| 1054 |
+
|
| 1055 |
+
def _get_devices(self, *args):
|
| 1056 |
+
model_device = None
|
| 1057 |
+
optimizer_device = None
|
| 1058 |
+
for obj in args:
|
| 1059 |
+
# Loop through model parameters and stop at the first once we have its device.
|
| 1060 |
+
if isinstance(obj, torch.nn.Module):
|
| 1061 |
+
for param in obj.parameters():
|
| 1062 |
+
model_device = param.device
|
| 1063 |
+
break
|
| 1064 |
+
# Loop through optimizer parameters groups and stop at the first once we have its device.
|
| 1065 |
+
if isinstance(obj, torch.optim.Optimizer):
|
| 1066 |
+
for param_group in obj.param_groups:
|
| 1067 |
+
if len(param_group["params"]) > 0:
|
| 1068 |
+
optimizer_device = param_group["params"][0].device
|
| 1069 |
+
break
|
| 1070 |
+
return (model_device, optimizer_device)
|
| 1071 |
+
|
| 1072 |
+
def get_state_dict(self, model, unwrap=True):
|
| 1073 |
+
is_zero_3 = False
|
| 1074 |
+
if self.distributed_type == DistributedType.DEEPSPEED:
|
| 1075 |
+
is_zero_3 = self.deepspeed_config["zero_optimization"]["stage"] == 3
|
| 1076 |
+
|
| 1077 |
+
if is_zero_3:
|
| 1078 |
+
if model.zero_gather_16bit_weights_on_model_save():
|
| 1079 |
+
state_dict = model._zero3_consolidated_16bit_state_dict()
|
| 1080 |
+
else:
|
| 1081 |
+
raise ValueError(
|
| 1082 |
+
"Cannot get 16bit model weights because `stage3_gather_16bit_weights_on_model_save` in DeepSpeed config is False. "
|
| 1083 |
+
"To save the model weights in 16bit, set `stage3_gather_16bit_weights_on_model_save` to True in DeepSpeed config file or "
|
| 1084 |
+
"set `zero3_save_16bit_model` to True when using `accelerate config`. "
|
| 1085 |
+
"To save the full checkpoint, run `model.save_checkpoint(save_dir)` and use `zero_to_fp32.py` to recover weights."
|
| 1086 |
+
)
|
| 1087 |
+
else:
|
| 1088 |
+
if unwrap:
|
| 1089 |
+
model = self.unwrap_model(model)
|
| 1090 |
+
state_dict = model.state_dict()
|
| 1091 |
+
|
| 1092 |
+
if state_dict is not None:
|
| 1093 |
+
for k in state_dict:
|
| 1094 |
+
if state_dict[k].dtype == torch.float16:
|
| 1095 |
+
state_dict[k] = state_dict[k].float()
|
| 1096 |
+
|
| 1097 |
+
return state_dict
|
| 1098 |
+
|
| 1099 |
+
def register_for_checkpointing(self, *objects):
|
| 1100 |
+
"""
|
| 1101 |
+
Makes note of `objects` and will save or load them in during `save_state` or `load_state`.
|
| 1102 |
+
|
| 1103 |
+
These should be utilized when the state is being loaded or saved in the same script. It is not designed to be
|
| 1104 |
+
used in different scripts
|
| 1105 |
+
|
| 1106 |
+
<Tip>
|
| 1107 |
+
|
| 1108 |
+
Every `object` must have a `load_state_dict` and `state_dict` function to be stored.
|
| 1109 |
+
|
| 1110 |
+
</Tip>
|
| 1111 |
+
"""
|
| 1112 |
+
invalid_objects = []
|
| 1113 |
+
for obj in objects:
|
| 1114 |
+
if not hasattr(obj, "state_dict") or not hasattr(obj, "load_state_dict"):
|
| 1115 |
+
invalid_objects.append(obj)
|
| 1116 |
+
if len(invalid_objects) > 0:
|
| 1117 |
+
err = "All `objects` must include a `state_dict` and `load_state_dict` function to be stored. The following inputs are invalid:"
|
| 1118 |
+
for index, obj in enumerate(invalid_objects):
|
| 1119 |
+
err += f"\n\t- Item at index {index}, `{get_pretty_name(obj)}`"
|
| 1120 |
+
raise ValueError(err)
|
| 1121 |
+
self._custom_objects.extend(objects)
|
| 1122 |
+
|
| 1123 |
+
@contextmanager
|
| 1124 |
+
def autocast(self):
|
| 1125 |
+
"""
|
| 1126 |
+
Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing
|
| 1127 |
+
different will happen otherwise.
|
| 1128 |
+
"""
|
| 1129 |
+
if self.native_amp:
|
| 1130 |
+
if self.mixed_precision == "fp16" and is_torch_version(">=", "1.10"):
|
| 1131 |
+
autocast_context = torch.cuda.amp.autocast(dtype=torch.float16)
|
| 1132 |
+
elif self.mixed_precision == "bf16" and is_bf16_available():
|
| 1133 |
+
if self.distributed_type in [DistributedType.NO, DistributedType.MULTI_CPU, DistributedType.MULTI_GPU]:
|
| 1134 |
+
device_type = "cpu" if not torch.cuda.is_available() else "cuda"
|
| 1135 |
+
autocast_context = torch.autocast(dtype=torch.bfloat16, device_type=device_type)
|
| 1136 |
+
else:
|
| 1137 |
+
autocast_context = torch.cuda.amp.autocast()
|
| 1138 |
+
|
| 1139 |
+
autocast_context.__enter__()
|
| 1140 |
+
yield
|
| 1141 |
+
autocast_context.__exit__(*sys.exc_info())
|
| 1142 |
+
else:
|
| 1143 |
+
yield
|
| 1144 |
+
|
| 1145 |
+
@property
|
| 1146 |
+
def optimizer_step_was_skipped(self):
|
| 1147 |
+
"""
|
| 1148 |
+
Whether or not the optimizer update was skipped (because of gradient overflow in mixed precision), in which
|
| 1149 |
+
case the learning rate should not be changed.
|
| 1150 |
+
"""
|
| 1151 |
+
for optimizer in self._optimizers:
|
| 1152 |
+
if optimizer.step_was_skipped:
|
| 1153 |
+
return True
|
| 1154 |
+
return False
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/big_modeling.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
from contextlib import contextmanager
|
| 17 |
+
from typing import Dict, List, Optional, Union
|
| 18 |
+
|
| 19 |
+
import torch
|
| 20 |
+
import torch.nn as nn
|
| 21 |
+
|
| 22 |
+
from .hooks import AlignDevicesHook, add_hook_to_module, attach_align_device_hook, attach_align_device_hook_on_blocks
|
| 23 |
+
from .utils import (
|
| 24 |
+
OffloadedWeightsLoader,
|
| 25 |
+
check_device_map,
|
| 26 |
+
extract_submodules_state_dict,
|
| 27 |
+
infer_auto_device_map,
|
| 28 |
+
load_checkpoint_in_model,
|
| 29 |
+
offload_state_dict,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@contextmanager
|
| 34 |
+
def init_empty_weights(include_buffers: bool = False):
|
| 35 |
+
"""
|
| 36 |
+
A context manager under which models are initialized with all parameters on the meta device, therefore creating an
|
| 37 |
+
empty model. Useful when just initializing the model would blow the available RAM.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
include_buffers (`bool`, *optional*, defaults to `False`):
|
| 41 |
+
Whether or not to also put all buffers on the meta device while initializing.
|
| 42 |
+
|
| 43 |
+
Example:
|
| 44 |
+
|
| 45 |
+
```pyton
|
| 46 |
+
import torch.nn as nn
|
| 47 |
+
from accelerate import init_empty_weights
|
| 48 |
+
|
| 49 |
+
# Initialize a model with 100 billions parameters in no time and without using any RAM.
|
| 50 |
+
with init_empty_weights():
|
| 51 |
+
tst = nn.Sequential(*[nn.Linear(10000, 10000) for _ in range(1000)])
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
<Tip warning={true}>
|
| 55 |
+
|
| 56 |
+
Any model created under this context manager has no weights. As such you can't do something like
|
| 57 |
+
`model.to(some_device)` with it. To load weights inside your empty model, see [`load_checkpoint_and_dispatch`].
|
| 58 |
+
|
| 59 |
+
</Tip>
|
| 60 |
+
"""
|
| 61 |
+
old_register_parameter = nn.Module.register_parameter
|
| 62 |
+
if include_buffers:
|
| 63 |
+
old_register_buffer = nn.Module.register_buffer
|
| 64 |
+
|
| 65 |
+
def register_empty_parameter(module, name, param):
|
| 66 |
+
old_register_parameter(module, name, param)
|
| 67 |
+
if param is not None:
|
| 68 |
+
param_cls = type(module._parameters[name])
|
| 69 |
+
kwargs = module._parameters[name].__dict__
|
| 70 |
+
module._parameters[name] = param_cls(module._parameters[name].to(torch.device("meta")), **kwargs)
|
| 71 |
+
|
| 72 |
+
def register_empty_buffer(module, name, buffer):
|
| 73 |
+
old_register_buffer(module, name, buffer)
|
| 74 |
+
if buffer is not None:
|
| 75 |
+
module._buffers[name] = module._buffers[name].to(torch.device("meta"))
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
nn.Module.register_parameter = register_empty_parameter
|
| 79 |
+
if include_buffers:
|
| 80 |
+
nn.Module.register_buffer = register_empty_buffer
|
| 81 |
+
yield
|
| 82 |
+
finally:
|
| 83 |
+
nn.Module.register_parameter = old_register_parameter
|
| 84 |
+
if include_buffers:
|
| 85 |
+
nn.Module.register_buffer = old_register_buffer
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def cpu_offload(
|
| 89 |
+
model: nn.Module,
|
| 90 |
+
execution_device: Optional[torch.device] = None,
|
| 91 |
+
offload_buffers: bool = False,
|
| 92 |
+
state_dict: Optional[Dict[str, torch.Tensor]] = None,
|
| 93 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 94 |
+
):
|
| 95 |
+
"""
|
| 96 |
+
Activates full CPU offload for a model. As a result, all parameters of the model will be offloaded and only one
|
| 97 |
+
copy of the state dict of the model will be kept. During the forward pass, parameters will be extracted from that
|
| 98 |
+
state dict and put on the execution device passed as they are needed, then offloaded again.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
model (`torch.nn.Module`):
|
| 102 |
+
The model to offload.
|
| 103 |
+
execution_device (`torch.device`, *optional*):
|
| 104 |
+
The device on which the forward pass of the model will be executed (should be a GPU). Will default to the
|
| 105 |
+
model first parameter device.
|
| 106 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 107 |
+
Whether or not to offload the buffers with the model parameters.
|
| 108 |
+
state_dict (`Dict[str, torch.Tensor]`, *optional*):
|
| 109 |
+
The state dict of the model that will be kept on CPU.
|
| 110 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 111 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 112 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 113 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 114 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 115 |
+
"""
|
| 116 |
+
if execution_device is None:
|
| 117 |
+
execution_device = next(iter(model.parameters())).device
|
| 118 |
+
if state_dict is None:
|
| 119 |
+
state_dict = {n: p.to("cpu") for n, p in model.state_dict().items()}
|
| 120 |
+
attach_align_device_hook(
|
| 121 |
+
model,
|
| 122 |
+
execution_device=execution_device,
|
| 123 |
+
offload=True,
|
| 124 |
+
offload_buffers=offload_buffers,
|
| 125 |
+
weights_map=state_dict,
|
| 126 |
+
preload_module_classes=preload_module_classes,
|
| 127 |
+
)
|
| 128 |
+
add_hook_to_module(model, AlignDevicesHook(io_same_device=True))
|
| 129 |
+
return model
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def disk_offload(
|
| 133 |
+
model: nn.Module,
|
| 134 |
+
offload_dir: Union[str, os.PathLike],
|
| 135 |
+
execution_device: Optional[torch.device] = None,
|
| 136 |
+
offload_buffers: bool = False,
|
| 137 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 138 |
+
):
|
| 139 |
+
"""
|
| 140 |
+
Activates full disk offload for a model. As a result, all parameters of the model will be offloaded as
|
| 141 |
+
memory-mapped array in a given folder. During the forward pass, parameters will be accessed from that folder and
|
| 142 |
+
put on the execution device passed as they are needed, then offloaded again.
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
model (`torch.nn.Module`): The model to offload.
|
| 146 |
+
offload_dir (`str` or `os.PathLike`):
|
| 147 |
+
The folder in which to offload the model weights (or where the model weights are already offloaded).
|
| 148 |
+
execution_device (`torch.device`, *optional*):
|
| 149 |
+
The device on which the forward pass of the model will be executed (should be a GPU). Will default to the
|
| 150 |
+
model's first parameter device.
|
| 151 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 152 |
+
Whether or not to offload the buffers with the model parameters.
|
| 153 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 154 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 155 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 156 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 157 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 158 |
+
"""
|
| 159 |
+
if not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json")):
|
| 160 |
+
offload_state_dict(offload_dir, model.state_dict())
|
| 161 |
+
if execution_device is None:
|
| 162 |
+
execution_device = next(iter(model.parameters())).device
|
| 163 |
+
weights_map = OffloadedWeightsLoader(save_folder=offload_dir)
|
| 164 |
+
attach_align_device_hook(
|
| 165 |
+
model,
|
| 166 |
+
execution_device=execution_device,
|
| 167 |
+
offload=True,
|
| 168 |
+
offload_buffers=offload_buffers,
|
| 169 |
+
weights_map=weights_map,
|
| 170 |
+
preload_module_classes=preload_module_classes,
|
| 171 |
+
)
|
| 172 |
+
add_hook_to_module(model, AlignDevicesHook(io_same_device=True))
|
| 173 |
+
return model
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def dispatch_model(
|
| 177 |
+
model: nn.Module,
|
| 178 |
+
device_map: Dict[str, Union[str, int, torch.device]],
|
| 179 |
+
main_device: Optional[torch.device] = None,
|
| 180 |
+
state_dict: Optional[Dict[str, torch.Tensor]] = None,
|
| 181 |
+
offload_dir: Union[str, os.PathLike] = None,
|
| 182 |
+
offload_buffers: bool = False,
|
| 183 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 184 |
+
):
|
| 185 |
+
"""
|
| 186 |
+
Dispatches a model according to a given device map. Layers of the model might be spread across GPUs, offloaded on
|
| 187 |
+
the CPU or even the disk.
|
| 188 |
+
|
| 189 |
+
Args:
|
| 190 |
+
model (`torch.nn.Module`):
|
| 191 |
+
The model to dispatch.
|
| 192 |
+
device_map (`Dict[str, Union[str, int, torch.device]]`):
|
| 193 |
+
A dictionary mapping module names in the models `state_dict` to the device they should go to. Note that
|
| 194 |
+
`"disk"` is accepted even if it's not a proper value for `torch.device`.
|
| 195 |
+
main_device (`str`, `int` or `torch.device`, *optional*):
|
| 196 |
+
The main execution device. Will default to the first device in the `device_map` different from `"cpu"` or
|
| 197 |
+
`"disk"`.
|
| 198 |
+
state_dict (`Dict[str, torch.Tensor]`, *optional*):
|
| 199 |
+
The state dict of the part of the model that will be kept on CPU.
|
| 200 |
+
offload_dir (`str` or `os.PathLike`):
|
| 201 |
+
The folder in which to offload the model weights (or where the model weights are already offloaded).
|
| 202 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 203 |
+
Whether or not to offload the buffers with the model parameters.
|
| 204 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 205 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 206 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 207 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 208 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 209 |
+
"""
|
| 210 |
+
# Error early if the device map is incomplete.
|
| 211 |
+
check_device_map(model, device_map)
|
| 212 |
+
|
| 213 |
+
if main_device is None:
|
| 214 |
+
main_device = [d for d in device_map.values() if d not in ["cpu", "disk"]][0]
|
| 215 |
+
|
| 216 |
+
cpu_modules = [name for name, device in device_map.items() if device == "cpu"]
|
| 217 |
+
if state_dict is None and len(cpu_modules) > 0:
|
| 218 |
+
state_dict = extract_submodules_state_dict(model.state_dict(), cpu_modules)
|
| 219 |
+
|
| 220 |
+
disk_modules = [name for name, device in device_map.items() if device == "disk"]
|
| 221 |
+
if offload_dir is None and len(disk_modules) > 0:
|
| 222 |
+
raise ValueError(
|
| 223 |
+
"We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules "
|
| 224 |
+
f"need to be offloaded: {', '.join(disk_modules)}."
|
| 225 |
+
)
|
| 226 |
+
if len(disk_modules) > 0 and (
|
| 227 |
+
not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json"))
|
| 228 |
+
):
|
| 229 |
+
disk_state_dict = extract_submodules_state_dict(model.state_dict(), disk_modules)
|
| 230 |
+
offload_state_dict(offload_dir, disk_state_dict)
|
| 231 |
+
|
| 232 |
+
execution_device = {
|
| 233 |
+
name: main_device if device in ["cpu", "disk"] else device for name, device in device_map.items()
|
| 234 |
+
}
|
| 235 |
+
offload = {name: device in ["cpu", "disk"] for name, device in device_map.items()}
|
| 236 |
+
save_folder = offload_dir if len(disk_modules) > 0 else None
|
| 237 |
+
if state_dict is not None or save_folder is not None:
|
| 238 |
+
weights_map = OffloadedWeightsLoader(state_dict=state_dict, save_folder=save_folder)
|
| 239 |
+
else:
|
| 240 |
+
weights_map = None
|
| 241 |
+
|
| 242 |
+
attach_align_device_hook_on_blocks(
|
| 243 |
+
model,
|
| 244 |
+
execution_device=execution_device,
|
| 245 |
+
offload=offload,
|
| 246 |
+
offload_buffers=offload_buffers,
|
| 247 |
+
weights_map=weights_map,
|
| 248 |
+
preload_module_classes=preload_module_classes,
|
| 249 |
+
)
|
| 250 |
+
model.hf_device_map = device_map
|
| 251 |
+
return model
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def load_checkpoint_and_dispatch(
|
| 255 |
+
model: nn.Module,
|
| 256 |
+
checkpoint: Union[str, os.PathLike],
|
| 257 |
+
device_map: Optional[Union[str, Dict[str, Union[int, str, torch.device]]]] = None,
|
| 258 |
+
max_memory: Optional[Dict[Union[int, str], Union[int, str]]] = None,
|
| 259 |
+
no_split_module_classes: Optional[List[str]] = None,
|
| 260 |
+
offload_folder: Optional[Union[str, os.PathLike]] = None,
|
| 261 |
+
offload_buffers: bool = False,
|
| 262 |
+
dtype: Optional[Union[str, torch.dtype]] = None,
|
| 263 |
+
offload_state_dict: bool = False,
|
| 264 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 265 |
+
):
|
| 266 |
+
"""
|
| 267 |
+
Loads a (potentially sharded) checkpoint inside a model, potentially sending weights to a given device as they are
|
| 268 |
+
loaded and adds the various hooks that will make this model run properly (even if split across devices).
|
| 269 |
+
|
| 270 |
+
Args:
|
| 271 |
+
model (`torch.nn.Module`): The model in which we want to load a checkpoint.
|
| 272 |
+
checkpoint (`str` or `os.PathLike`):
|
| 273 |
+
The folder checkpoint to load. It can be:
|
| 274 |
+
- a path to a file containing a whole model state dict
|
| 275 |
+
- a path to a `.json` file containing the index to a sharded checkpoint
|
| 276 |
+
- a path to a folder containing a unique `.index.json` file and the shards of a checkpoint.
|
| 277 |
+
device_map (`Dict[str, Union[int, str, torch.device]]`, *optional*):
|
| 278 |
+
A map that specifies where each submodule should go. It doesn't need to be refined to each parameter/buffer
|
| 279 |
+
name, once a given module name is inside, every submodule of it will be sent to the same device.
|
| 280 |
+
|
| 281 |
+
To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`.
|
| 282 |
+
max_memory (`Dict`, *optional*):
|
| 283 |
+
A dictionary device identifier to maximum memory. Will default to the maximum memory available for each GPU
|
| 284 |
+
and the available CPU RAM if unset.
|
| 285 |
+
no_split_module_classes (`List[str]`, *optional*):
|
| 286 |
+
A list of layer class names that should never be split across device (for instance any layer that has a
|
| 287 |
+
residual connection).
|
| 288 |
+
offload_folder (`str` or `os.PathLike`, *optional*):
|
| 289 |
+
If the `device_map` contains any value `"disk"`, the folder where we will offload weights.
|
| 290 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 291 |
+
In the layers that are offloaded on the CPU or the hard drive, whether or not to offload the buffers as
|
| 292 |
+
well as the parameters.
|
| 293 |
+
dtype (`str` or `torch.dtype`, *optional*):
|
| 294 |
+
If provided, the weights will be converted to that type when loaded.
|
| 295 |
+
offload_state_dict (`bool`, *optional*, defaults to `False`):
|
| 296 |
+
If `True`, will temporarily offload the CPU state dict on the hard drive to avoig getting out of CPU RAM if
|
| 297 |
+
the weight of the CPU state dict + the biggest shard does not fit.
|
| 298 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 299 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 300 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 301 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 302 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 303 |
+
"""
|
| 304 |
+
if device_map == "auto":
|
| 305 |
+
device_map = infer_auto_device_map(
|
| 306 |
+
model, max_memory=max_memory, no_split_module_classes=no_split_module_classes, dtype=dtype
|
| 307 |
+
)
|
| 308 |
+
load_checkpoint_in_model(
|
| 309 |
+
model,
|
| 310 |
+
checkpoint,
|
| 311 |
+
device_map=device_map,
|
| 312 |
+
offload_folder=offload_folder,
|
| 313 |
+
dtype=dtype,
|
| 314 |
+
offload_state_dict=offload_state_dict,
|
| 315 |
+
)
|
| 316 |
+
if device_map is None:
|
| 317 |
+
return model
|
| 318 |
+
return dispatch_model(
|
| 319 |
+
model,
|
| 320 |
+
device_map=device_map,
|
| 321 |
+
offload_dir=offload_folder,
|
| 322 |
+
offload_buffers=offload_buffers,
|
| 323 |
+
preload_module_classes=preload_module_classes,
|
| 324 |
+
)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/checkpointing.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import random
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import List
|
| 19 |
+
|
| 20 |
+
import numpy as np
|
| 21 |
+
import torch
|
| 22 |
+
from torch.cuda.amp import GradScaler
|
| 23 |
+
|
| 24 |
+
from .utils import (
|
| 25 |
+
MODEL_NAME,
|
| 26 |
+
OPTIMIZER_NAME,
|
| 27 |
+
RNG_STATE_NAME,
|
| 28 |
+
SCALER_NAME,
|
| 29 |
+
SCHEDULER_NAME,
|
| 30 |
+
get_pretty_name,
|
| 31 |
+
is_tpu_available,
|
| 32 |
+
save,
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if is_tpu_available(check_device=False):
|
| 37 |
+
import torch_xla.core.xla_model as xm
|
| 38 |
+
|
| 39 |
+
from .logging import get_logger
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
logger = get_logger(__name__)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def save_accelerator_state(
|
| 46 |
+
output_dir: str,
|
| 47 |
+
model_states: List[dict],
|
| 48 |
+
optimizers: list,
|
| 49 |
+
schedulers: list,
|
| 50 |
+
process_index: int,
|
| 51 |
+
scaler: GradScaler = None,
|
| 52 |
+
):
|
| 53 |
+
"""
|
| 54 |
+
Saves the current states of the models, optimizers, scaler, and RNG generators to a given directory.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
output_dir (`str` or `os.PathLike`):
|
| 58 |
+
The name of the folder to save all relevant weights and states.
|
| 59 |
+
model_states (`List[torch.nn.Module]`):
|
| 60 |
+
A list of model states
|
| 61 |
+
optimizers (`List[torch.optim.Optimizer]`):
|
| 62 |
+
A list of optimizer instances
|
| 63 |
+
schedulers (`List[torch.optim.lr_scheduler._LRScheduler]`):
|
| 64 |
+
A list of learning rate schedulers
|
| 65 |
+
process_index (`int`):
|
| 66 |
+
The current process index in the Accelerator state
|
| 67 |
+
scaler (`torch.cuda.amp.GradScaler`, *optional*):
|
| 68 |
+
An optional gradient scaler instance to save
|
| 69 |
+
"""
|
| 70 |
+
# Model states
|
| 71 |
+
for i, state in enumerate(model_states):
|
| 72 |
+
weights_name = f"{MODEL_NAME}.bin" if i == 0 else f"{MODEL_NAME}_{i}.bin"
|
| 73 |
+
output_model_file = os.path.join(output_dir, weights_name)
|
| 74 |
+
save(state, output_model_file)
|
| 75 |
+
logger.info(f"Model weights saved in {output_model_file}")
|
| 76 |
+
# Optimizer states
|
| 77 |
+
for i, opt in enumerate(optimizers):
|
| 78 |
+
state = opt.state_dict()
|
| 79 |
+
optimizer_name = f"{OPTIMIZER_NAME}.bin" if i == 0 else f"{OPTIMIZER_NAME}_{i}.bin"
|
| 80 |
+
output_optimizer_file = os.path.join(output_dir, optimizer_name)
|
| 81 |
+
save(state, output_optimizer_file)
|
| 82 |
+
logger.info(f"Optimizer state saved in {output_optimizer_file}")
|
| 83 |
+
# Scheduler states
|
| 84 |
+
for i, scheduler in enumerate(schedulers):
|
| 85 |
+
state = scheduler.state_dict()
|
| 86 |
+
scheduler_name = f"{SCHEDULER_NAME}.bin" if i == 0 else f"{SCHEDULER_NAME}_{i}.bin"
|
| 87 |
+
output_scheduler_file = os.path.join(output_dir, scheduler_name)
|
| 88 |
+
save(state, output_scheduler_file)
|
| 89 |
+
logger.info(f"Scheduler state saved in {output_scheduler_file}")
|
| 90 |
+
# GradScaler state
|
| 91 |
+
if scaler is not None:
|
| 92 |
+
state = scaler.state_dict()
|
| 93 |
+
output_scaler_file = os.path.join(output_dir, SCALER_NAME)
|
| 94 |
+
torch.save(state, output_scaler_file)
|
| 95 |
+
logger.info(f"Gradient scaler state saved in {output_scaler_file}")
|
| 96 |
+
# Random number generator states
|
| 97 |
+
states = {}
|
| 98 |
+
states_name = f"{RNG_STATE_NAME}_{process_index}.pkl"
|
| 99 |
+
states["random_state"] = random.getstate()
|
| 100 |
+
states["numpy_random_seed"] = np.random.get_state()
|
| 101 |
+
states["torch_manual_seed"] = torch.get_rng_state()
|
| 102 |
+
states["torch_cuda_manual_seed"] = torch.cuda.get_rng_state_all()
|
| 103 |
+
# ^^ safe to call this function even if cuda is not available
|
| 104 |
+
if is_tpu_available():
|
| 105 |
+
states["xm_seed"] = xm.get_rng_state()
|
| 106 |
+
output_states_file = os.path.join(output_dir, states_name)
|
| 107 |
+
torch.save(states, output_states_file)
|
| 108 |
+
logger.info(f"Random states saved in {output_states_file}")
|
| 109 |
+
return output_dir
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def load_accelerator_state(input_dir, models, optimizers, schedulers, process_index, scaler=None):
|
| 113 |
+
"""
|
| 114 |
+
Loads states of the models, optimizers, scaler, and RNG generators from a given directory.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
input_dir (`str` or `os.PathLike`):
|
| 118 |
+
The name of the folder to load all relevant weights and states.
|
| 119 |
+
models (`List[torch.nn.Module]`):
|
| 120 |
+
A list of model instances
|
| 121 |
+
optimizers (`List[torch.optim.Optimizer]`):
|
| 122 |
+
A list of optimizer instances
|
| 123 |
+
schedulers (`List[torch.optim.lr_scheduler._LRScheduler]`):
|
| 124 |
+
A list of learning rate schedulers
|
| 125 |
+
process_index (`int`):
|
| 126 |
+
The current process index in the Accelerator state
|
| 127 |
+
scaler (`torch.cuda.amp.GradScaler`, *optional*):
|
| 128 |
+
An optional *GradScaler* instance to load
|
| 129 |
+
"""
|
| 130 |
+
# Model states
|
| 131 |
+
for i, model in enumerate(models):
|
| 132 |
+
weights_name = f"{MODEL_NAME}.bin" if i == 0 else f"{MODEL_NAME}_{i}.bin"
|
| 133 |
+
input_model_file = os.path.join(input_dir, weights_name)
|
| 134 |
+
models[i].load_state_dict(torch.load(input_model_file, map_location="cpu"))
|
| 135 |
+
logger.info("All model weights loaded successfully")
|
| 136 |
+
|
| 137 |
+
# Optimizer states
|
| 138 |
+
for i, opt in enumerate(optimizers):
|
| 139 |
+
optimizer_name = f"{OPTIMIZER_NAME}.bin" if i == 0 else f"{OPTIMIZER_NAME}_{i}.bin"
|
| 140 |
+
input_optimizer_file = os.path.join(input_dir, optimizer_name)
|
| 141 |
+
optimizers[i].load_state_dict(torch.load(input_optimizer_file, map_location="cpu"))
|
| 142 |
+
logger.info("All optimizer states loaded successfully")
|
| 143 |
+
|
| 144 |
+
# Scheduler states
|
| 145 |
+
for i, scheduler in enumerate(schedulers):
|
| 146 |
+
scheduler_name = f"{SCHEDULER_NAME}.bin" if i == 0 else f"{SCHEDULER_NAME}_{i}.bin"
|
| 147 |
+
input_scheduler_file = os.path.join(input_dir, scheduler_name)
|
| 148 |
+
scheduler.load_state_dict(torch.load(input_scheduler_file))
|
| 149 |
+
logger.info("All scheduler states loaded successfully")
|
| 150 |
+
|
| 151 |
+
# GradScaler state
|
| 152 |
+
if scaler is not None:
|
| 153 |
+
input_scaler_file = os.path.join(input_dir, SCALER_NAME)
|
| 154 |
+
scaler.load_state_dict(torch.load(input_scaler_file))
|
| 155 |
+
logger.info("GradScaler state loaded successfully")
|
| 156 |
+
|
| 157 |
+
# Random states
|
| 158 |
+
states = torch.load(os.path.join(input_dir, f"{RNG_STATE_NAME}_{process_index}.pkl"))
|
| 159 |
+
random.setstate(states["random_state"])
|
| 160 |
+
np.random.set_state(states["numpy_random_seed"])
|
| 161 |
+
torch.set_rng_state(states["torch_manual_seed"])
|
| 162 |
+
torch.cuda.set_rng_state_all(states["torch_cuda_manual_seed"])
|
| 163 |
+
# ^^ safe to call this function even if cuda is not available
|
| 164 |
+
if is_tpu_available():
|
| 165 |
+
xm.set_rng_state(states["xm_seed"])
|
| 166 |
+
logger.info("All random states loaded successfully")
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def save_custom_state(obj, path, index: int = 0):
|
| 170 |
+
"""
|
| 171 |
+
Saves the state of `obj` to `{path}/custom_checkpoint_{index}.pkl`
|
| 172 |
+
"""
|
| 173 |
+
# Should this be the right way to get a qual_name type value from `obj`?
|
| 174 |
+
save_location = Path(path) / f"custom_checkpoint_{index}.pkl"
|
| 175 |
+
logger.info(f"Saving the state of {get_pretty_name(obj)} to {save_location}")
|
| 176 |
+
torch.save(obj.state_dict(), save_location)
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def load_custom_state(obj, path, index: int = 0):
|
| 180 |
+
"""
|
| 181 |
+
Loads the state of `obj` at `{path}/custom_checkpoint_{index}.pkl`
|
| 182 |
+
"""
|
| 183 |
+
load_location = f"{path}/custom_checkpoint_{index}.pkl"
|
| 184 |
+
logger.info(f"Loading the state of {get_pretty_name(obj)} from {load_location}")
|
| 185 |
+
obj.load_state_dict(torch.load(load_location))
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/data_loader.py
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import math
|
| 16 |
+
from typing import List, Optional, Union
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
from torch.utils.data import BatchSampler, DataLoader, IterableDataset
|
| 20 |
+
|
| 21 |
+
from .logging import get_logger
|
| 22 |
+
from .state import AcceleratorState, DistributedType, GradientState, is_tpu_available
|
| 23 |
+
from .utils import (
|
| 24 |
+
RNGType,
|
| 25 |
+
broadcast,
|
| 26 |
+
broadcast_object_list,
|
| 27 |
+
concatenate,
|
| 28 |
+
find_batch_size,
|
| 29 |
+
get_data_structure,
|
| 30 |
+
initialize_tensors,
|
| 31 |
+
is_torch_version,
|
| 32 |
+
send_to_device,
|
| 33 |
+
slice_tensors,
|
| 34 |
+
synchronize_rng_states,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
if is_tpu_available(check_device=False):
|
| 39 |
+
import torch_xla.distributed.parallel_loader as xpl
|
| 40 |
+
|
| 41 |
+
class MpDeviceLoaderWrapper(xpl.MpDeviceLoader):
|
| 42 |
+
"""
|
| 43 |
+
Wrapper for the xpl.MpDeviceLoader class that knows the total batch size.
|
| 44 |
+
|
| 45 |
+
**Available attributes:**
|
| 46 |
+
|
| 47 |
+
- **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
|
| 48 |
+
Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
|
| 49 |
+
number of processes
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
@property
|
| 53 |
+
def total_batch_size(self):
|
| 54 |
+
return self._loader.total_batch_size
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
logger = get_logger(__name__)
|
| 58 |
+
|
| 59 |
+
# kwargs of the DataLoader in min version 1.4.0.
|
| 60 |
+
_PYTORCH_DATALOADER_KWARGS = {
|
| 61 |
+
"batch_size": 1,
|
| 62 |
+
"shuffle": False,
|
| 63 |
+
"sampler": None,
|
| 64 |
+
"batch_sampler": None,
|
| 65 |
+
"num_workers": 0,
|
| 66 |
+
"collate_fn": None,
|
| 67 |
+
"pin_memory": False,
|
| 68 |
+
"drop_last": False,
|
| 69 |
+
"timeout": 0,
|
| 70 |
+
"worker_init_fn": None,
|
| 71 |
+
"multiprocessing_context": None,
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# kwargs added after by version
|
| 75 |
+
_PYTORCH_DATALOADER_ADDITIONAL_KWARGS = {
|
| 76 |
+
"1.6.0": {"generator": None},
|
| 77 |
+
"1.7.0": {"prefetch_factor": 2, "persistent_workers": False},
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
for v, additional_kwargs in _PYTORCH_DATALOADER_ADDITIONAL_KWARGS.items():
|
| 81 |
+
if is_torch_version(">=", v):
|
| 82 |
+
_PYTORCH_DATALOADER_KWARGS.update(additional_kwargs)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class BatchSamplerShard(BatchSampler):
|
| 86 |
+
"""
|
| 87 |
+
Wraps a PyTorch `BatchSampler` to generate batches for one of the processes only. Instances of this class will
|
| 88 |
+
always yield a number of batches that is a round multiple of `num_processes` and that all have the same size.
|
| 89 |
+
Depending on the value of the `drop_last` attribute of the batch sampler passed, it will either stop the iteration
|
| 90 |
+
at the first batch that would be too small / not present on all processes or loop with indices from the beginning.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
batch_sampler (`torch.utils.data.sampler.BatchSampler`):
|
| 94 |
+
The batch sampler to split in several shards.
|
| 95 |
+
num_processes (`int`, *optional*, defaults to 1):
|
| 96 |
+
The number of processes running concurrently.
|
| 97 |
+
process_index (`int`, *optional*, defaults to 0):
|
| 98 |
+
The index of the current process.
|
| 99 |
+
split_batches (`bool`, *optional*, defaults to `False`):
|
| 100 |
+
Whether the shards should be created by splitting a batch to give a piece of it on each process, or by
|
| 101 |
+
yielding different full batches on each process.
|
| 102 |
+
|
| 103 |
+
On two processes with a sampler of `[[0, 1, 2, 3], [4, 5, 6, 7]]`, this will result in:
|
| 104 |
+
|
| 105 |
+
- the sampler on process 0 to yield `[0, 1, 2, 3]` and the sampler on process 1 to yield `[4, 5, 6, 7]` if
|
| 106 |
+
this argument is set to `False`.
|
| 107 |
+
- the sampler on process 0 to yield `[0, 1]` then `[4, 5]` and the sampler on process 1 to yield `[2, 3]`
|
| 108 |
+
then `[6, 7]` if this argument is set to `True`.
|
| 109 |
+
|
| 110 |
+
<Tip warning={true}>
|
| 111 |
+
|
| 112 |
+
This does not support `BatchSampler` with varying batch size yet.
|
| 113 |
+
|
| 114 |
+
</Tip>"""
|
| 115 |
+
|
| 116 |
+
def __init__(
|
| 117 |
+
self,
|
| 118 |
+
batch_sampler: BatchSampler,
|
| 119 |
+
num_processes: int = 1,
|
| 120 |
+
process_index: int = 0,
|
| 121 |
+
split_batches: bool = False,
|
| 122 |
+
):
|
| 123 |
+
if split_batches and batch_sampler.batch_size % num_processes != 0:
|
| 124 |
+
raise ValueError(
|
| 125 |
+
f"To use `BatchSamplerShard` in `split_batches` mode, the batch size ({batch_sampler.batch_size}) "
|
| 126 |
+
f"needs to be a round multiple of the number of processes ({num_processes})."
|
| 127 |
+
)
|
| 128 |
+
self.batch_sampler = batch_sampler
|
| 129 |
+
self.num_processes = num_processes
|
| 130 |
+
self.process_index = process_index
|
| 131 |
+
self.split_batches = split_batches
|
| 132 |
+
self.batch_size = batch_sampler.batch_size
|
| 133 |
+
self.drop_last = batch_sampler.drop_last
|
| 134 |
+
|
| 135 |
+
def __len__(self):
|
| 136 |
+
if self.split_batches:
|
| 137 |
+
return len(self.batch_sampler)
|
| 138 |
+
if len(self.batch_sampler) % self.num_processes == 0:
|
| 139 |
+
return len(self.batch_sampler) // self.num_processes
|
| 140 |
+
length = len(self.batch_sampler) // self.num_processes
|
| 141 |
+
return length if self.drop_last else length + 1
|
| 142 |
+
|
| 143 |
+
def __iter__(self):
|
| 144 |
+
return self._iter_with_split() if self.split_batches else self._iter_with_no_split()
|
| 145 |
+
|
| 146 |
+
def _iter_with_split(self):
|
| 147 |
+
initial_data = []
|
| 148 |
+
batch_length = self.batch_sampler.batch_size // self.num_processes
|
| 149 |
+
for idx, batch in enumerate(self.batch_sampler):
|
| 150 |
+
if idx == 0:
|
| 151 |
+
initial_data = batch
|
| 152 |
+
if len(batch) == self.batch_size:
|
| 153 |
+
# If the batch is full, we yield the part of it this process is responsible of.
|
| 154 |
+
yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)]
|
| 155 |
+
|
| 156 |
+
# If drop_last is True of the last batch was full, iteration is over, otherwise...
|
| 157 |
+
if not self.drop_last and len(initial_data) > 0 and len(batch) < self.batch_size:
|
| 158 |
+
# For degenerate cases where the dataset has less than num_process * batch_size samples
|
| 159 |
+
while len(initial_data) < self.batch_size:
|
| 160 |
+
initial_data += initial_data
|
| 161 |
+
batch = batch + initial_data
|
| 162 |
+
yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)]
|
| 163 |
+
|
| 164 |
+
def _iter_with_no_split(self):
|
| 165 |
+
initial_data = []
|
| 166 |
+
batch_to_yield = []
|
| 167 |
+
for idx, batch in enumerate(self.batch_sampler):
|
| 168 |
+
# We gather the initial indices in case we need to circle back at the end.
|
| 169 |
+
if not self.drop_last and idx < self.num_processes:
|
| 170 |
+
initial_data += batch
|
| 171 |
+
# We identify the batch to yield but wait until we ar sure every process gets a full batch before actually
|
| 172 |
+
# yielding it.
|
| 173 |
+
if idx % self.num_processes == self.process_index:
|
| 174 |
+
batch_to_yield = batch
|
| 175 |
+
if idx % self.num_processes == self.num_processes - 1 and len(batch) == self.batch_size:
|
| 176 |
+
yield batch_to_yield
|
| 177 |
+
batch_to_yield = []
|
| 178 |
+
|
| 179 |
+
# If drop_last is True, iteration is over, otherwise...
|
| 180 |
+
if not self.drop_last and len(initial_data) > 0:
|
| 181 |
+
# ... we yield the complete batch we had saved before if it has the proper length
|
| 182 |
+
if len(batch_to_yield) == self.batch_size:
|
| 183 |
+
yield batch_to_yield
|
| 184 |
+
|
| 185 |
+
# For degenerate cases where the dataset has less than num_process * batch_size samples
|
| 186 |
+
while len(initial_data) < self.num_processes * self.batch_size:
|
| 187 |
+
initial_data += initial_data
|
| 188 |
+
|
| 189 |
+
# If the last batch seen was of the proper size, it has been yielded by its process so we move to the next
|
| 190 |
+
if len(batch) == self.batch_size:
|
| 191 |
+
batch = []
|
| 192 |
+
idx += 1
|
| 193 |
+
|
| 194 |
+
# Make sure we yield a multiple of self.num_processes batches
|
| 195 |
+
cycle_index = 0
|
| 196 |
+
while idx % self.num_processes != 0 or len(batch) > 0:
|
| 197 |
+
end_index = cycle_index + self.batch_size - len(batch)
|
| 198 |
+
batch += initial_data[cycle_index:end_index]
|
| 199 |
+
if idx % self.num_processes == self.process_index:
|
| 200 |
+
yield batch
|
| 201 |
+
cycle_index = end_index
|
| 202 |
+
batch = []
|
| 203 |
+
idx += 1
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
class IterableDatasetShard(IterableDataset):
|
| 207 |
+
"""
|
| 208 |
+
Wraps a PyTorch `IterableDataset` to generate samples for one of the processes only. Instances of this class will
|
| 209 |
+
always yield a number of samples that is a round multiple of the actual batch size (depending of the value of
|
| 210 |
+
`split_batches`, this is either `batch_size` or `batch_size x num_processes`). Depending on the value of the
|
| 211 |
+
`drop_last` attribute of the batch sampler passed, it will either stop the iteration at the first batch that would
|
| 212 |
+
be too small or loop with indices from the beginning.
|
| 213 |
+
|
| 214 |
+
Args:
|
| 215 |
+
dataset (`torch.utils.data.dataset.IterableDataset`):
|
| 216 |
+
The batch sampler to split in several shards.
|
| 217 |
+
batch_size (`int`, *optional*, defaults to 1):
|
| 218 |
+
The size of the batches per shard (if `split_batches=False`) or the size of the batches (if
|
| 219 |
+
`split_batches=True`).
|
| 220 |
+
drop_last (`bool`, *optional*, defaults to `False`):
|
| 221 |
+
Whether or not to drop the last incomplete batch or complete the last batches by using the samples from the
|
| 222 |
+
beginning.
|
| 223 |
+
num_processes (`int`, *optional*, defaults to 1):
|
| 224 |
+
The number of processes running concurrently.
|
| 225 |
+
process_index (`int`, *optional*, defaults to 0):
|
| 226 |
+
The index of the current process.
|
| 227 |
+
split_batches (`bool`, *optional*, defaults to `False`):
|
| 228 |
+
Whether the shards should be created by splitting a batch to give a piece of it on each process, or by
|
| 229 |
+
yielding different full batches on each process.
|
| 230 |
+
|
| 231 |
+
On two processes with an iterable dataset yielding of `[0, 1, 2, 3, 4, 5, 6, 7]`, this will result in:
|
| 232 |
+
|
| 233 |
+
- the shard on process 0 to yield `[0, 1, 2, 3]` and the shard on process 1 to yield `[4, 5, 6, 7]` if this
|
| 234 |
+
argument is set to `False`.
|
| 235 |
+
- the shard on process 0 to yield `[0, 1, 4, 5]` and the sampler on process 1 to yield `[2, 3, 6, 7]` if
|
| 236 |
+
this argument is set to `True`.
|
| 237 |
+
"""
|
| 238 |
+
|
| 239 |
+
def __init__(
|
| 240 |
+
self,
|
| 241 |
+
dataset: IterableDataset,
|
| 242 |
+
batch_size: int = 1,
|
| 243 |
+
drop_last: bool = False,
|
| 244 |
+
num_processes: int = 1,
|
| 245 |
+
process_index: int = 0,
|
| 246 |
+
split_batches: bool = False,
|
| 247 |
+
):
|
| 248 |
+
if split_batches and batch_size > 1 and batch_size % num_processes != 0:
|
| 249 |
+
raise ValueError(
|
| 250 |
+
f"To use `IterableDatasetShard` in `split_batches` mode, the batch size ({batch_size}) "
|
| 251 |
+
f"needs to be a round multiple of the number of processes ({num_processes})."
|
| 252 |
+
)
|
| 253 |
+
self.dataset = dataset
|
| 254 |
+
self.batch_size = batch_size
|
| 255 |
+
self.drop_last = drop_last
|
| 256 |
+
self.num_processes = num_processes
|
| 257 |
+
self.process_index = process_index
|
| 258 |
+
self.split_batches = split_batches
|
| 259 |
+
|
| 260 |
+
def __iter__(self):
|
| 261 |
+
real_batch_size = self.batch_size if self.split_batches else (self.batch_size * self.num_processes)
|
| 262 |
+
process_batch_size = (self.batch_size // self.num_processes) if self.split_batches else self.batch_size
|
| 263 |
+
process_slice = range(self.process_index * process_batch_size, (self.process_index + 1) * process_batch_size)
|
| 264 |
+
|
| 265 |
+
first_batch = None
|
| 266 |
+
current_batch = []
|
| 267 |
+
for element in self.dataset:
|
| 268 |
+
current_batch.append(element)
|
| 269 |
+
# Wait to have a full batch before yielding elements.
|
| 270 |
+
if len(current_batch) == real_batch_size:
|
| 271 |
+
for i in process_slice:
|
| 272 |
+
yield current_batch[i]
|
| 273 |
+
if first_batch is None:
|
| 274 |
+
first_batch = current_batch.copy()
|
| 275 |
+
current_batch = []
|
| 276 |
+
|
| 277 |
+
# Finished if drop_last is True, otherwise complete the last batch with elements from the beginning.
|
| 278 |
+
if not self.drop_last and len(current_batch) > 0:
|
| 279 |
+
if first_batch is None:
|
| 280 |
+
first_batch = current_batch.copy()
|
| 281 |
+
while len(current_batch) < real_batch_size:
|
| 282 |
+
current_batch += first_batch
|
| 283 |
+
for i in process_slice:
|
| 284 |
+
yield current_batch[i]
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
class DataLoaderShard(DataLoader):
|
| 288 |
+
"""
|
| 289 |
+
Subclass of a PyTorch `DataLoader` that will deal with device placement and current distributed setup.
|
| 290 |
+
|
| 291 |
+
Args:
|
| 292 |
+
dataset (`torch.utils.data.dataset.Dataset`):
|
| 293 |
+
The dataset to use to build this datalaoder.
|
| 294 |
+
device (`torch.device`, *optional*):
|
| 295 |
+
If passed, the device to put all batches on.
|
| 296 |
+
rng_types (list of `str` or [`~utils.RNGType`]):
|
| 297 |
+
The list of random number generators to synchronize at the beginning of each iteration. Should be one or
|
| 298 |
+
several of:
|
| 299 |
+
|
| 300 |
+
- `"torch"`: the base torch random number generator
|
| 301 |
+
- `"cuda"`: the CUDA random number generator (GPU only)
|
| 302 |
+
- `"xla"`: the XLA random number generator (TPU only)
|
| 303 |
+
- `"generator"`: an optional `torch.Generator`
|
| 304 |
+
generator (`torch.Generator`, *optional*):
|
| 305 |
+
A random number generator to keep synchronized across processes.
|
| 306 |
+
kwargs:
|
| 307 |
+
All other keyword arguments to pass to the regular `DataLoader` initialization.
|
| 308 |
+
|
| 309 |
+
**Available attributes:**
|
| 310 |
+
|
| 311 |
+
- **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
|
| 312 |
+
Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
|
| 313 |
+
number of processes
|
| 314 |
+
"""
|
| 315 |
+
|
| 316 |
+
def __init__(self, dataset, device=None, rng_types=None, generator=None, **kwargs):
|
| 317 |
+
super().__init__(dataset, **kwargs)
|
| 318 |
+
self.device = device
|
| 319 |
+
self.rng_types = rng_types
|
| 320 |
+
self.generator = generator
|
| 321 |
+
self.gradient_state = GradientState()
|
| 322 |
+
|
| 323 |
+
def __iter__(self):
|
| 324 |
+
if self.rng_types is not None:
|
| 325 |
+
synchronize_rng_states(self.rng_types, self.generator)
|
| 326 |
+
self.gradient_state._set_end_of_dataloader(False)
|
| 327 |
+
dataloader_iter = super().__iter__()
|
| 328 |
+
# We iterate one batch ahead to check when we are at the end
|
| 329 |
+
try:
|
| 330 |
+
current_batch = next(dataloader_iter)
|
| 331 |
+
except StopIteration:
|
| 332 |
+
yield
|
| 333 |
+
while True:
|
| 334 |
+
try:
|
| 335 |
+
# But we still move it to the device so it is done before `StopIteration` is reached
|
| 336 |
+
if self.device is not None:
|
| 337 |
+
current_batch = send_to_device(current_batch, self.device)
|
| 338 |
+
next_batch = next(dataloader_iter)
|
| 339 |
+
yield current_batch
|
| 340 |
+
current_batch = next_batch
|
| 341 |
+
except StopIteration:
|
| 342 |
+
self.gradient_state._set_end_of_dataloader(True)
|
| 343 |
+
yield current_batch
|
| 344 |
+
break
|
| 345 |
+
|
| 346 |
+
@property
|
| 347 |
+
def total_batch_size(self):
|
| 348 |
+
return (
|
| 349 |
+
self.batch_sampler.batch_size
|
| 350 |
+
if self.batch_sampler.split_batches
|
| 351 |
+
else (self.batch_sampler.batch_size * self.batch_sampler.num_processes)
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
class DataLoaderDispatcher(DataLoader):
|
| 356 |
+
"""
|
| 357 |
+
Args:
|
| 358 |
+
Subclass of a PyTorch `DataLoader` that will iterate and preprocess on process 0 only, then dispatch on each
|
| 359 |
+
process their part of the batch.
|
| 360 |
+
split_batches (`bool`, *optional*, defaults to `False`):
|
| 361 |
+
Whether the resulting `DataLoader` should split the batches of the original data loader across devices or
|
| 362 |
+
yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of
|
| 363 |
+
`num_processes` batches at each iteration). Another way to see this is that the observed batch size will be
|
| 364 |
+
the same as the initial `dataloader` if this option is set to `True`, the batch size of the initial
|
| 365 |
+
`dataloader` multiplied by `num_processes` otherwise. Setting this option to `True` requires that the batch
|
| 366 |
+
size of the `dataloader` is a round multiple of `batch_size`.
|
| 367 |
+
|
| 368 |
+
**Available attributes:**
|
| 369 |
+
|
| 370 |
+
- **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
|
| 371 |
+
Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
|
| 372 |
+
number of processes
|
| 373 |
+
"""
|
| 374 |
+
|
| 375 |
+
def __init__(self, dataset, split_batches: bool = False, **kwargs):
|
| 376 |
+
shuffle = False
|
| 377 |
+
if is_torch_version(">=", "1.11.0"):
|
| 378 |
+
from torch.utils.data.datapipes.iter.combinatorics import ShufflerIterDataPipe
|
| 379 |
+
|
| 380 |
+
# We need to save the shuffling state of the DataPipe
|
| 381 |
+
if isinstance(dataset, ShufflerIterDataPipe):
|
| 382 |
+
shuffle = dataset._shuffle_enabled
|
| 383 |
+
super().__init__(dataset, **kwargs)
|
| 384 |
+
self.split_batches = split_batches
|
| 385 |
+
if is_torch_version("<", "1.8.0"):
|
| 386 |
+
raise ImportError(
|
| 387 |
+
"Using `DataLoaderDispatcher` requires PyTorch 1.8.0 minimum. You have {torch.__version__}."
|
| 388 |
+
)
|
| 389 |
+
if shuffle:
|
| 390 |
+
torch.utils.data.graph_settings.apply_shuffle_settings(dataset, shuffle=shuffle)
|
| 391 |
+
|
| 392 |
+
self.gradient_state = GradientState()
|
| 393 |
+
self.state = AcceleratorState()
|
| 394 |
+
|
| 395 |
+
def _fetch_batches(self, iterator):
|
| 396 |
+
batches, batch = None, None
|
| 397 |
+
# On process 0, we gather the batch to dispatch.
|
| 398 |
+
if self.state.process_index == 0:
|
| 399 |
+
try:
|
| 400 |
+
if self.split_batches:
|
| 401 |
+
# One batch of the main iterator is dispatched and split.
|
| 402 |
+
batch = next(iterator)
|
| 403 |
+
else:
|
| 404 |
+
# num_processes batches of the main iterator are concatenated then dispatched and split.
|
| 405 |
+
# We add the batches one by one so we have the remainder available when drop_last=False.
|
| 406 |
+
batches = []
|
| 407 |
+
for _ in range(self.state.num_processes):
|
| 408 |
+
batches.append(next(iterator))
|
| 409 |
+
batch = concatenate(batches, dim=0)
|
| 410 |
+
# In both cases, we need to get the structure of the batch that we will broadcast on other
|
| 411 |
+
# processes to initialize the tensors with the right shape.
|
| 412 |
+
# data_structure, stop_iteration
|
| 413 |
+
batch_info = [get_data_structure(batch), False]
|
| 414 |
+
except StopIteration:
|
| 415 |
+
batch_info = [None, True]
|
| 416 |
+
else:
|
| 417 |
+
batch_info = [None, self._stop_iteration]
|
| 418 |
+
# This is inplace, so after this instruction, every process has the same `batch_info` as process 0.
|
| 419 |
+
broadcast_object_list(batch_info)
|
| 420 |
+
self._stop_iteration = batch_info[1]
|
| 421 |
+
if self._stop_iteration:
|
| 422 |
+
# If drop_last is False and split_batches is False, we may have a remainder to take care of.
|
| 423 |
+
if not self.split_batches and not self.drop_last:
|
| 424 |
+
if self.state.process_index == 0 and len(batches) > 0:
|
| 425 |
+
batch = concatenate(batches, dim=0)
|
| 426 |
+
batch_info = [get_data_structure(batch), False]
|
| 427 |
+
else:
|
| 428 |
+
batch_info = [None, True]
|
| 429 |
+
broadcast_object_list(batch_info)
|
| 430 |
+
if batch_info[1]:
|
| 431 |
+
return batch, batch_info, True
|
| 432 |
+
else:
|
| 433 |
+
return batch, batch_info, True
|
| 434 |
+
return batch, batch_info, False
|
| 435 |
+
|
| 436 |
+
def __iter__(self):
|
| 437 |
+
self.gradient_state._set_end_of_dataloader(False)
|
| 438 |
+
main_iterator = None
|
| 439 |
+
if self.state.process_index == 0:
|
| 440 |
+
# We only iterate through the DataLoader on process 0.
|
| 441 |
+
main_iterator = super().__iter__()
|
| 442 |
+
self._stop_iteration = False
|
| 443 |
+
first_batch = None
|
| 444 |
+
batch, batch_info, skip = self._fetch_batches(main_iterator)
|
| 445 |
+
while True:
|
| 446 |
+
if skip:
|
| 447 |
+
continue
|
| 448 |
+
if self.state.process_index != 0:
|
| 449 |
+
# Initialize tensors on other processes than process 0.
|
| 450 |
+
batch = initialize_tensors(batch_info[0])
|
| 451 |
+
batch = send_to_device(batch, self.state.device)
|
| 452 |
+
# Broadcast the batch before splitting it.
|
| 453 |
+
batch = broadcast(batch, from_process=0)
|
| 454 |
+
|
| 455 |
+
if not self.drop_last and first_batch is None:
|
| 456 |
+
# We keep at least num processes elements of the first batch to be able to complete the last batch
|
| 457 |
+
first_batch = slice_tensors(batch, slice(0, self.state.num_processes))
|
| 458 |
+
|
| 459 |
+
observed_batch_size = find_batch_size(batch)
|
| 460 |
+
batch_size = observed_batch_size // self.state.num_processes
|
| 461 |
+
|
| 462 |
+
if not self.drop_last and self._stop_iteration and observed_batch_size % self.state.num_processes != 0:
|
| 463 |
+
# If the last batch is not complete, let's add the first batch to it.
|
| 464 |
+
batch = concatenate([batch, first_batch], dim=0)
|
| 465 |
+
batch_size += 1
|
| 466 |
+
|
| 467 |
+
data_slice = slice(self.state.process_index * batch_size, (self.state.process_index + 1) * batch_size)
|
| 468 |
+
next_batch, next_batch_info, next_skip = self._fetch_batches(main_iterator)
|
| 469 |
+
if not self._stop_iteration:
|
| 470 |
+
yield slice_tensors(batch, data_slice)
|
| 471 |
+
batch, batch_info, skip = next_batch, next_batch_info, next_skip
|
| 472 |
+
else:
|
| 473 |
+
self.gradient_state._set_end_of_dataloader(True)
|
| 474 |
+
yield slice_tensors(batch, data_slice)
|
| 475 |
+
break
|
| 476 |
+
|
| 477 |
+
def __len__(self):
|
| 478 |
+
whole_length = super().__len__()
|
| 479 |
+
if self.split_batches:
|
| 480 |
+
return whole_length
|
| 481 |
+
elif self.drop_last:
|
| 482 |
+
return whole_length // self.state.num_processes
|
| 483 |
+
else:
|
| 484 |
+
return math.ceil(whole_length / self.state.num_processes)
|
| 485 |
+
|
| 486 |
+
@property
|
| 487 |
+
def total_batch_size(self):
|
| 488 |
+
return (
|
| 489 |
+
self.dataset.batch_size if self.split_batches else (self.dataset.batch_size * self.dataset.num_processes)
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
def prepare_data_loader(
|
| 494 |
+
dataloader: DataLoader,
|
| 495 |
+
device: Optional[torch.device] = None,
|
| 496 |
+
num_processes: Optional[int] = None,
|
| 497 |
+
process_index: Optional[int] = None,
|
| 498 |
+
split_batches: bool = False,
|
| 499 |
+
put_on_device: bool = False,
|
| 500 |
+
rng_types: Optional[List[Union[str, RNGType]]] = None,
|
| 501 |
+
dispatch_batches: Optional[bool] = None,
|
| 502 |
+
) -> DataLoader:
|
| 503 |
+
"""
|
| 504 |
+
Wraps a PyTorch `DataLoader` to generate batches for one of the processes only.
|
| 505 |
+
|
| 506 |
+
Depending on the value of the `drop_last` attribute of the `dataloader` passed, it will either stop the iteration
|
| 507 |
+
at the first batch that would be too small / not present on all processes or loop with indices from the beginning.
|
| 508 |
+
|
| 509 |
+
Args:
|
| 510 |
+
dataloader (`torch.utils.data.dataloader.DataLoader`):
|
| 511 |
+
The data loader to split across several devices.
|
| 512 |
+
device (`torch.device`):
|
| 513 |
+
The target device for the returned `DataLoader`.
|
| 514 |
+
num_processes (`int`, *optional*):
|
| 515 |
+
The number of processes running concurrently. Will default to the value given by
|
| 516 |
+
[`~state.AcceleratorState`].
|
| 517 |
+
process_index (`int`, *optional*):
|
| 518 |
+
The index of the current process. Will default to the value given by [`~state.AcceleratorState`].
|
| 519 |
+
split_batches (`bool`, *optional*, defaults to `False`):
|
| 520 |
+
Whether the resulting `DataLoader` should split the batches of the original data loader across devices or
|
| 521 |
+
yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of
|
| 522 |
+
`num_processes` batches at each iteration).
|
| 523 |
+
|
| 524 |
+
Another way to see this is that the observed batch size will be the same as the initial `dataloader` if
|
| 525 |
+
this option is set to `True`, the batch size of the initial `dataloader` multiplied by `num_processes`
|
| 526 |
+
otherwise.
|
| 527 |
+
|
| 528 |
+
Setting this option to `True` requires that the batch size of the `dataloader` is a round multiple of
|
| 529 |
+
`batch_size`.
|
| 530 |
+
put_on_device (`bool`, *optional*, defaults to `False`):
|
| 531 |
+
Whether or not to put the batches on `device` (only works if the batches are nested list, tuples or
|
| 532 |
+
dictionaries of tensors).
|
| 533 |
+
rng_types (list of `str` or [`~utils.RNGType`]):
|
| 534 |
+
The list of random number generators to synchronize at the beginning of each iteration. Should be one or
|
| 535 |
+
several of:
|
| 536 |
+
|
| 537 |
+
- `"torch"`: the base torch random number generator
|
| 538 |
+
- `"cuda"`: the CUDA random number generator (GPU only)
|
| 539 |
+
- `"xla"`: the XLA random number generator (TPU only)
|
| 540 |
+
- `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your
|
| 541 |
+
dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type.
|
| 542 |
+
|
| 543 |
+
dispatch_batches (`bool`, *optional*):
|
| 544 |
+
If set to `True`, the datalaoder prepared is only iterated through on the main process and then the batches
|
| 545 |
+
are split and broadcast to each process. Will default to `True` when the underlying dataset is an
|
| 546 |
+
`IterableDataset`, `False` otherwise.
|
| 547 |
+
|
| 548 |
+
Returns:
|
| 549 |
+
`torch.utils.data.dataloader.DataLoader`: A new data loader that will yield the portion of the batches
|
| 550 |
+
|
| 551 |
+
<Tip warning={true}>
|
| 552 |
+
|
| 553 |
+
This does not support `BatchSampler` with varying batch size yet.
|
| 554 |
+
|
| 555 |
+
</Tip>"""
|
| 556 |
+
if dispatch_batches is None:
|
| 557 |
+
if is_torch_version("<", "1.8.0") or not put_on_device:
|
| 558 |
+
dispatch_batches = False
|
| 559 |
+
else:
|
| 560 |
+
dispatch_batches = isinstance(dataloader.dataset, IterableDataset)
|
| 561 |
+
|
| 562 |
+
if dispatch_batches and not put_on_device:
|
| 563 |
+
raise ValueError("Using `dispatch_batches=True` requires `put_on_device=True`.")
|
| 564 |
+
# Grab defaults from AcceleratorState
|
| 565 |
+
state = AcceleratorState()
|
| 566 |
+
if num_processes is None:
|
| 567 |
+
num_processes = state.num_processes
|
| 568 |
+
if process_index is None:
|
| 569 |
+
process_index = state.process_index
|
| 570 |
+
|
| 571 |
+
# Sanity check
|
| 572 |
+
if split_batches and dataloader.batch_size > 1 and dataloader.batch_size % num_processes != 0:
|
| 573 |
+
raise ValueError(
|
| 574 |
+
f"To use a `DataLoader` in `split_batches` mode, the batch size ({dataloader.batch_size}) "
|
| 575 |
+
f"needs to be a round multiple of the number of processes ({num_processes})."
|
| 576 |
+
)
|
| 577 |
+
|
| 578 |
+
new_dataset = dataloader.dataset
|
| 579 |
+
# Iterable dataset doesn't like batch_sampler, but data_loader creates a default one for it
|
| 580 |
+
new_batch_sampler = dataloader.batch_sampler if not isinstance(new_dataset, IterableDataset) else None
|
| 581 |
+
generator = getattr(dataloader, "generator", None)
|
| 582 |
+
# No change if no multiprocess
|
| 583 |
+
if num_processes != 1 and not dispatch_batches:
|
| 584 |
+
if isinstance(new_dataset, IterableDataset):
|
| 585 |
+
if getattr(dataloader.dataset, "generator", None) is not None:
|
| 586 |
+
generator = dataloader.dataset.generator
|
| 587 |
+
new_dataset = IterableDatasetShard(
|
| 588 |
+
new_dataset,
|
| 589 |
+
batch_size=dataloader.batch_size,
|
| 590 |
+
drop_last=dataloader.drop_last,
|
| 591 |
+
num_processes=num_processes,
|
| 592 |
+
process_index=process_index,
|
| 593 |
+
split_batches=split_batches,
|
| 594 |
+
)
|
| 595 |
+
else:
|
| 596 |
+
# New batch sampler for the current process.
|
| 597 |
+
if hasattr(dataloader.sampler, "generator"):
|
| 598 |
+
if dataloader.sampler.generator is None:
|
| 599 |
+
dataloader.sampler.generator = torch.Generator()
|
| 600 |
+
generator = dataloader.sampler.generator
|
| 601 |
+
generator.manual_seed(int(torch.empty((), dtype=torch.int64).random_().item()))
|
| 602 |
+
elif getattr(dataloader.batch_sampler, "generator", None) is not None:
|
| 603 |
+
generator = dataloader.batch_sampler.generator
|
| 604 |
+
new_batch_sampler = BatchSamplerShard(
|
| 605 |
+
dataloader.batch_sampler,
|
| 606 |
+
num_processes=num_processes,
|
| 607 |
+
process_index=process_index,
|
| 608 |
+
split_batches=split_batches,
|
| 609 |
+
)
|
| 610 |
+
|
| 611 |
+
# We ignore all of those since they are all dealt with by our new_batch_sampler
|
| 612 |
+
ignore_kwargs = [
|
| 613 |
+
"batch_size",
|
| 614 |
+
"shuffle",
|
| 615 |
+
"sampler",
|
| 616 |
+
"batch_sampler",
|
| 617 |
+
"drop_last",
|
| 618 |
+
"generator",
|
| 619 |
+
]
|
| 620 |
+
|
| 621 |
+
if rng_types is not None and generator is None and "generator" in rng_types:
|
| 622 |
+
rng_types.remove("generator")
|
| 623 |
+
|
| 624 |
+
kwargs = {
|
| 625 |
+
k: getattr(dataloader, k, _PYTORCH_DATALOADER_KWARGS[k])
|
| 626 |
+
for k in _PYTORCH_DATALOADER_KWARGS
|
| 627 |
+
if k not in ignore_kwargs
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
# Need to provide batch_size as batch_sampler is None for Iterable dataset
|
| 631 |
+
if new_batch_sampler is None:
|
| 632 |
+
kwargs["drop_last"] = dataloader.drop_last
|
| 633 |
+
kwargs["batch_size"] = dataloader.batch_size // num_processes if split_batches else dataloader.batch_size
|
| 634 |
+
|
| 635 |
+
if dispatch_batches:
|
| 636 |
+
dataloader = DataLoaderDispatcher(
|
| 637 |
+
new_dataset,
|
| 638 |
+
split_batches=split_batches,
|
| 639 |
+
batch_sampler=new_batch_sampler,
|
| 640 |
+
**kwargs,
|
| 641 |
+
)
|
| 642 |
+
else:
|
| 643 |
+
dataloader = DataLoaderShard(
|
| 644 |
+
new_dataset,
|
| 645 |
+
device=device if put_on_device and state.distributed_type != DistributedType.TPU else None,
|
| 646 |
+
batch_sampler=new_batch_sampler,
|
| 647 |
+
rng_types=rng_types,
|
| 648 |
+
generator=generator,
|
| 649 |
+
**kwargs,
|
| 650 |
+
)
|
| 651 |
+
|
| 652 |
+
if state.distributed_type == DistributedType.TPU:
|
| 653 |
+
return MpDeviceLoaderWrapper(dataloader, device)
|
| 654 |
+
return dataloader
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/hooks.py
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import functools
|
| 16 |
+
from typing import Dict, List, Mapping, Optional, Union
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
import torch.nn as nn
|
| 20 |
+
|
| 21 |
+
from .utils import PrefixedDataset, find_device, named_module_tensors, send_to_device, set_module_tensor_to_device
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ModelHook:
|
| 25 |
+
"""
|
| 26 |
+
A hook that contains callbacks to be executed just before and after the forward method of a model. The difference
|
| 27 |
+
with PyTorch existing hooks is that they get passed along the kwargs.
|
| 28 |
+
|
| 29 |
+
Class attribute:
|
| 30 |
+
- **no_grad** (`bool`, *optional*, defaults to `False`) -- Whether or not to execute the actual forward pass under
|
| 31 |
+
the `torch.no_grad()` context manager.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
no_grad = False
|
| 35 |
+
|
| 36 |
+
def init_hook(self, module):
|
| 37 |
+
"""
|
| 38 |
+
To be executed when the hook is attached to the module.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
module (`torch.nn.Module`): The module attached to this hook.
|
| 42 |
+
"""
|
| 43 |
+
return module
|
| 44 |
+
|
| 45 |
+
def pre_forward(self, module, *args, **kwargs):
|
| 46 |
+
"""
|
| 47 |
+
To be executed just before the forward method of the model.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
module (`torch.nn.Module`): The module whose forward pass will be executed just after this event.
|
| 51 |
+
args (`Tuple[Any]`): The positional arguments passed to the module.
|
| 52 |
+
kwargs (`Dict[Str, Any]`): The keyword arguments passed to the module.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
`Tuple[Tuple[Any], Dict[Str, Any]]`: A tuple with the treated `args` and `kwargs`.
|
| 56 |
+
"""
|
| 57 |
+
return args, kwargs
|
| 58 |
+
|
| 59 |
+
def post_forward(self, module, output):
|
| 60 |
+
"""
|
| 61 |
+
To be executed just after the forward method of the model.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
module (`torch.nn.Module`): The module whose forward pass been executed just before this event.
|
| 65 |
+
output (`Any`): The output of the module.
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
`Any`: The processed `output`.
|
| 69 |
+
"""
|
| 70 |
+
return output
|
| 71 |
+
|
| 72 |
+
def detach_hook(self, module):
|
| 73 |
+
"""
|
| 74 |
+
To be executed when the hook is deached from a module.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
module (`torch.nn.Module`): The module detached from this hook.
|
| 78 |
+
"""
|
| 79 |
+
return module
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class SequentialHook(ModelHook):
|
| 83 |
+
"""
|
| 84 |
+
A hook that can contain several hooks and iterates through them at each event.
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
def __init__(self, *hooks):
|
| 88 |
+
self.hooks = hooks
|
| 89 |
+
|
| 90 |
+
def init_hook(self, module):
|
| 91 |
+
for hook in self.hooks:
|
| 92 |
+
module = hook.init_hook(module)
|
| 93 |
+
return module
|
| 94 |
+
|
| 95 |
+
def pre_forward(self, module, *args, **kwargs):
|
| 96 |
+
for hook in self.hooks:
|
| 97 |
+
args, kwargs = hook.pre_forward(module, *args, **kwargs)
|
| 98 |
+
return args, kwargs
|
| 99 |
+
|
| 100 |
+
def post_forward(self, module, output):
|
| 101 |
+
for hook in self.hooks:
|
| 102 |
+
output = hook.post_forward(module, output)
|
| 103 |
+
return output
|
| 104 |
+
|
| 105 |
+
def detach_hook(self, module):
|
| 106 |
+
for hook in self.hooks:
|
| 107 |
+
module = hook.detach_hook(module)
|
| 108 |
+
return module
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def add_hook_to_module(module: nn.Module, hook: ModelHook):
|
| 112 |
+
"""
|
| 113 |
+
Adds a hook to a given module. This will rewrite the `forward` method of the module to include the hook, to remove
|
| 114 |
+
this behavior and restore the original `forward` method, use `remove_hook_from_module`.
|
| 115 |
+
|
| 116 |
+
<Tip warning={true}>
|
| 117 |
+
|
| 118 |
+
If the module already contains a hook, this will replace it with the new hook passed. To chain two hooks together,
|
| 119 |
+
use the `SequentialHook` class.
|
| 120 |
+
|
| 121 |
+
</Tip>
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
module (`torch.nn.Module`): The module to attach a hook to.
|
| 125 |
+
hook (`ModelHook`): The hook to attach.
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
`torch.nn.Module`: The same module, with the hook attached (the module is modified in place, so the result can
|
| 129 |
+
be discarded).
|
| 130 |
+
"""
|
| 131 |
+
if hasattr(module, "_hf_hook") and hasattr(module, "_old_forward"):
|
| 132 |
+
# If we already put some hook on this module, we replace it with the new one.
|
| 133 |
+
old_forward = module._old_forward
|
| 134 |
+
else:
|
| 135 |
+
old_forward = module.forward
|
| 136 |
+
module._old_forward = old_forward
|
| 137 |
+
|
| 138 |
+
module = hook.init_hook(module)
|
| 139 |
+
module._hf_hook = hook
|
| 140 |
+
|
| 141 |
+
@functools.wraps(old_forward)
|
| 142 |
+
def new_forward(*args, **kwargs):
|
| 143 |
+
args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
|
| 144 |
+
if module._hf_hook.no_grad:
|
| 145 |
+
with torch.no_grad():
|
| 146 |
+
output = old_forward(*args, **kwargs)
|
| 147 |
+
else:
|
| 148 |
+
output = old_forward(*args, **kwargs)
|
| 149 |
+
return module._hf_hook.post_forward(module, output)
|
| 150 |
+
|
| 151 |
+
module.forward = new_forward
|
| 152 |
+
return module
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def remove_hook_from_module(module: nn.Module):
|
| 156 |
+
"""
|
| 157 |
+
Removes any hook attached to a module via `add_hook_to_module`.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
module (`torch.nn.Module`): The module to attach a hook to.
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
`torch.nn.Module`: The same module, with the hook detached (the module is modified in place, so the result can
|
| 164 |
+
be discarded).
|
| 165 |
+
"""
|
| 166 |
+
if hasattr(module, "_hf_hook"):
|
| 167 |
+
module._hf_hook.detach_hook(module)
|
| 168 |
+
delattr(module, "_hf_hook")
|
| 169 |
+
|
| 170 |
+
if hasattr(module, "_old_forward"):
|
| 171 |
+
module.forward = module._old_forward
|
| 172 |
+
delattr(module, "_old_forward")
|
| 173 |
+
|
| 174 |
+
return module
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
class AlignDevicesHook(ModelHook):
|
| 178 |
+
"""
|
| 179 |
+
A generic `ModelHook` that ensures inputs and model weights are on the same device for the forward pass of the
|
| 180 |
+
associated module, potentially offloading the weights after the forward pass.
|
| 181 |
+
|
| 182 |
+
Args:
|
| 183 |
+
execution_device (`torch.device`, *optional*):
|
| 184 |
+
The device on which inputs and model weights should be placed before the forward pass.
|
| 185 |
+
offload (`bool`, *optional*, defauts to `False`):
|
| 186 |
+
Whether or not the weights should be offloaded after the forward pass.
|
| 187 |
+
io_same_device (`bool`, *optional*, defaults to `False`):
|
| 188 |
+
Whether or not the output should be placed on the same device as the input was.
|
| 189 |
+
weights_map (`Mapping[str, torch.Tensor]`, *optional*):
|
| 190 |
+
When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values.
|
| 191 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 192 |
+
Whether or not to include the associated module's buffers when offloading.
|
| 193 |
+
place_submodules (`bool`, *optional*, defaults to `False`):
|
| 194 |
+
Whether to place the submodules on `execution_device` during the `init_hook` event.
|
| 195 |
+
"""
|
| 196 |
+
|
| 197 |
+
def __init__(
|
| 198 |
+
self,
|
| 199 |
+
execution_device: Optional[Union[int, str, torch.device]] = None,
|
| 200 |
+
offload: bool = False,
|
| 201 |
+
io_same_device: bool = False,
|
| 202 |
+
weights_map: Optional[Mapping] = None,
|
| 203 |
+
offload_buffers: bool = False,
|
| 204 |
+
place_submodules: bool = False,
|
| 205 |
+
):
|
| 206 |
+
self.execution_device = execution_device
|
| 207 |
+
self.offload = offload
|
| 208 |
+
self.io_same_device = io_same_device
|
| 209 |
+
self.weights_map = weights_map
|
| 210 |
+
self.offload_buffers = offload_buffers
|
| 211 |
+
self.place_submodules = place_submodules
|
| 212 |
+
|
| 213 |
+
# Will contain the input device when `io_same_device=True`.
|
| 214 |
+
self.input_device = None
|
| 215 |
+
self.param_original_devices = {}
|
| 216 |
+
self.buffer_original_devices = {}
|
| 217 |
+
|
| 218 |
+
def init_hook(self, module):
|
| 219 |
+
if not self.offload and self.execution_device is not None:
|
| 220 |
+
for name, _ in named_module_tensors(module, recurse=self.place_submodules):
|
| 221 |
+
set_module_tensor_to_device(module, name, self.execution_device)
|
| 222 |
+
elif self.offload:
|
| 223 |
+
self.original_devices = {
|
| 224 |
+
name: param.device for name, param in named_module_tensors(module, recurse=self.place_submodules)
|
| 225 |
+
}
|
| 226 |
+
if self.weights_map is None:
|
| 227 |
+
self.weights_map = {
|
| 228 |
+
name: param.to("cpu")
|
| 229 |
+
for name, param in named_module_tensors(
|
| 230 |
+
module, include_buffers=self.offload_buffers, recurse=self.place_submodules
|
| 231 |
+
)
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
for name, _ in named_module_tensors(
|
| 235 |
+
module, include_buffers=self.offload_buffers, recurse=self.place_submodules
|
| 236 |
+
):
|
| 237 |
+
set_module_tensor_to_device(module, name, "meta")
|
| 238 |
+
if not self.offload_buffers and self.execution_device is not None:
|
| 239 |
+
for name, _ in module.named_buffers(recurse=self.place_submodules):
|
| 240 |
+
set_module_tensor_to_device(module, name, self.execution_device)
|
| 241 |
+
return module
|
| 242 |
+
|
| 243 |
+
def pre_forward(self, module, *args, **kwargs):
|
| 244 |
+
if self.io_same_device:
|
| 245 |
+
self.input_device = find_device([args, kwargs])
|
| 246 |
+
if self.offload:
|
| 247 |
+
for name, _ in named_module_tensors(
|
| 248 |
+
module, include_buffers=self.offload_buffers, recurse=self.place_submodules
|
| 249 |
+
):
|
| 250 |
+
set_module_tensor_to_device(module, name, self.execution_device, value=self.weights_map[name])
|
| 251 |
+
|
| 252 |
+
return send_to_device(args, self.execution_device), send_to_device(kwargs, self.execution_device)
|
| 253 |
+
|
| 254 |
+
def post_forward(self, module, output):
|
| 255 |
+
if self.offload:
|
| 256 |
+
for name, _ in named_module_tensors(
|
| 257 |
+
module, include_buffers=self.offload_buffers, recurse=self.place_submodules
|
| 258 |
+
):
|
| 259 |
+
set_module_tensor_to_device(module, name, "meta")
|
| 260 |
+
|
| 261 |
+
if self.io_same_device and self.input_device is not None:
|
| 262 |
+
output = send_to_device(output, self.input_device)
|
| 263 |
+
|
| 264 |
+
return output
|
| 265 |
+
|
| 266 |
+
def detach_hook(self, module):
|
| 267 |
+
if self.offload:
|
| 268 |
+
for name, device in self.original_devices.items():
|
| 269 |
+
if device != torch.device("meta"):
|
| 270 |
+
set_module_tensor_to_device(module, name, device, value=self.weights_map.get(name, None))
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def attach_execution_device_hook(
|
| 274 |
+
module: torch.nn.Module,
|
| 275 |
+
execution_device: Union[int, str, torch.device],
|
| 276 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 277 |
+
):
|
| 278 |
+
"""
|
| 279 |
+
Recursively attaches `AlignDevicesHook` to all submodules of a given model to make sure they have the right
|
| 280 |
+
execution device
|
| 281 |
+
|
| 282 |
+
Args:
|
| 283 |
+
module (`torch.nn.Module`):
|
| 284 |
+
The module where we want to attach the hooks.
|
| 285 |
+
execution_device (`int`, `str` or `torch.device`):
|
| 286 |
+
The device on which inputs and model weights should be placed before the forward pass.
|
| 287 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 288 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 289 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 290 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 291 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 292 |
+
"""
|
| 293 |
+
if not hasattr(module, "_hf_hook") and len(module.state_dict()) > 0:
|
| 294 |
+
add_hook_to_module(module, AlignDevicesHook(execution_device))
|
| 295 |
+
|
| 296 |
+
# Break the recursion if we get to a preload module.
|
| 297 |
+
if preload_module_classes is not None and module.__class__.__name__ in preload_module_classes:
|
| 298 |
+
return
|
| 299 |
+
|
| 300 |
+
for child in module.children():
|
| 301 |
+
attach_execution_device_hook(child, execution_device)
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def attach_align_device_hook(
|
| 305 |
+
module: torch.nn.Module,
|
| 306 |
+
execution_device: Optional[torch.device] = None,
|
| 307 |
+
offload: bool = False,
|
| 308 |
+
weights_map: Optional[Mapping] = None,
|
| 309 |
+
offload_buffers: bool = False,
|
| 310 |
+
module_name: str = "",
|
| 311 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 312 |
+
):
|
| 313 |
+
"""
|
| 314 |
+
Recursively attaches `AlignDevicesHook` to all submodules of a given model that have direct parameters and/or
|
| 315 |
+
buffers.
|
| 316 |
+
|
| 317 |
+
Args:
|
| 318 |
+
module (`torch.nn.Module`):
|
| 319 |
+
The module where we want to attach the hooks.
|
| 320 |
+
execution_device (`torch.device`, *optional*):
|
| 321 |
+
The device on which inputs and model weights should be placed before the forward pass.
|
| 322 |
+
offload (`bool`, *optional*, defauts to `False`):
|
| 323 |
+
Whether or not the weights should be offloaded after the forward pass.
|
| 324 |
+
weights_map (`Mapping[str, torch.Tensor]`, *optional*):
|
| 325 |
+
When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values.
|
| 326 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 327 |
+
Whether or not to include the associated module's buffers when offloading.
|
| 328 |
+
module_name (`str`, *optional*, defaults to `""`):
|
| 329 |
+
The name of the module.
|
| 330 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 331 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 332 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 333 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 334 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 335 |
+
"""
|
| 336 |
+
# Attach the hook on this module if it has any direct tensor.
|
| 337 |
+
directs = named_module_tensors(module)
|
| 338 |
+
full_offload = (
|
| 339 |
+
offload and preload_module_classes is not None and module.__class__.__name__ in preload_module_classes
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
if len(list(directs)) > 0 or full_offload:
|
| 343 |
+
if weights_map is not None:
|
| 344 |
+
prefix = f"{module_name}." if len(module_name) > 0 else ""
|
| 345 |
+
prefixed_weights_map = PrefixedDataset(weights_map, prefix)
|
| 346 |
+
else:
|
| 347 |
+
prefixed_weights_map = None
|
| 348 |
+
hook = AlignDevicesHook(
|
| 349 |
+
execution_device=execution_device,
|
| 350 |
+
offload=offload,
|
| 351 |
+
weights_map=prefixed_weights_map,
|
| 352 |
+
offload_buffers=offload_buffers,
|
| 353 |
+
place_submodules=full_offload,
|
| 354 |
+
)
|
| 355 |
+
add_hook_to_module(module, hook)
|
| 356 |
+
|
| 357 |
+
# We stop the recursion in case we hit the full offload.
|
| 358 |
+
if full_offload:
|
| 359 |
+
return
|
| 360 |
+
|
| 361 |
+
# Recurse on all children of the module.
|
| 362 |
+
for child_name, child in module.named_children():
|
| 363 |
+
child_name = f"{module_name}.{child_name}" if len(module_name) > 0 else child_name
|
| 364 |
+
attach_align_device_hook(
|
| 365 |
+
child,
|
| 366 |
+
execution_device=execution_device,
|
| 367 |
+
offload=offload,
|
| 368 |
+
weights_map=weights_map,
|
| 369 |
+
offload_buffers=offload_buffers,
|
| 370 |
+
module_name=child_name,
|
| 371 |
+
preload_module_classes=preload_module_classes,
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def remove_hook_from_submodules(module: nn.Module):
|
| 376 |
+
"""
|
| 377 |
+
Recursively removes all hooks attached on the submodules of a given model.
|
| 378 |
+
|
| 379 |
+
Args:
|
| 380 |
+
module (`torch.nn.Module`): The module on which to remove all hooks.
|
| 381 |
+
"""
|
| 382 |
+
remove_hook_from_module(module)
|
| 383 |
+
for child in module.children():
|
| 384 |
+
remove_hook_from_submodules(child)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def attach_align_device_hook_on_blocks(
|
| 388 |
+
module: nn.Module,
|
| 389 |
+
execution_device: Optional[Union[torch.device, Dict[str, torch.device]]] = None,
|
| 390 |
+
offload: Union[bool, Dict[str, bool]] = False,
|
| 391 |
+
weights_map: Mapping = None,
|
| 392 |
+
offload_buffers: bool = False,
|
| 393 |
+
module_name: str = "",
|
| 394 |
+
preload_module_classes: Optional[List[str]] = None,
|
| 395 |
+
):
|
| 396 |
+
"""
|
| 397 |
+
Attaches `AlignDevicesHook` to all blocks of a given model as needed.
|
| 398 |
+
|
| 399 |
+
Args:
|
| 400 |
+
module (`torch.nn.Module`):
|
| 401 |
+
The module where we want to attach the hooks.
|
| 402 |
+
execution_device (`torch.device` or `Dict[str, torch.device]`, *optional*):
|
| 403 |
+
The device on which inputs and model weights should be placed before the forward pass. It can be one device
|
| 404 |
+
for the whole module, or a dictionary mapping module name to device.
|
| 405 |
+
offload (`bool`, *optional*, defauts to `False`):
|
| 406 |
+
Whether or not the weights should be offloaded after the forward pass. It can be one boolean for the whole
|
| 407 |
+
module, or a dictionary mapping module name to boolean.
|
| 408 |
+
weights_map (`Mapping[str, torch.Tensor]`, *optional*):
|
| 409 |
+
When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values.
|
| 410 |
+
offload_buffers (`bool`, *optional*, defaults to `False`):
|
| 411 |
+
Whether or not to include the associated module's buffers when offloading.
|
| 412 |
+
module_name (`str`, *optional*, defaults to `""`):
|
| 413 |
+
The name of the module.
|
| 414 |
+
preload_module_classes (`List[str]`, *optional*):
|
| 415 |
+
A list of classes whose instances should load all their weights (even in the submodules) at the beginning
|
| 416 |
+
of the forward. This should only be used for classes that have submodules which are registered but not
|
| 417 |
+
called directly during the forward, for instance if a `dense` linear layer is registered, but at forward,
|
| 418 |
+
`dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly.
|
| 419 |
+
"""
|
| 420 |
+
# If one device and one offload, we've got one hook.
|
| 421 |
+
if not isinstance(execution_device, Mapping) and not isinstance(offload, dict):
|
| 422 |
+
if not offload:
|
| 423 |
+
hook = AlignDevicesHook(execution_device=execution_device, io_same_device=True, place_submodules=True)
|
| 424 |
+
add_hook_to_module(module, hook)
|
| 425 |
+
else:
|
| 426 |
+
attach_align_device_hook(
|
| 427 |
+
module,
|
| 428 |
+
execution_device=execution_device,
|
| 429 |
+
offload=True,
|
| 430 |
+
weights_map=weights_map,
|
| 431 |
+
offload_buffers=offload_buffers,
|
| 432 |
+
module_name=module_name,
|
| 433 |
+
)
|
| 434 |
+
return
|
| 435 |
+
|
| 436 |
+
if not isinstance(execution_device, Mapping):
|
| 437 |
+
execution_device = {key: execution_device for key in offload.keys()}
|
| 438 |
+
if not isinstance(offload, Mapping):
|
| 439 |
+
offload = {key: offload for key in execution_device.keys()}
|
| 440 |
+
|
| 441 |
+
if module_name in execution_device and not offload[module_name]:
|
| 442 |
+
hook = AlignDevicesHook(
|
| 443 |
+
execution_device=execution_device[module_name],
|
| 444 |
+
offload_buffers=offload_buffers,
|
| 445 |
+
io_same_device=(module_name == ""),
|
| 446 |
+
place_submodules=True,
|
| 447 |
+
)
|
| 448 |
+
add_hook_to_module(module, hook)
|
| 449 |
+
attach_execution_device_hook(module, execution_device[module_name])
|
| 450 |
+
elif module_name in execution_device:
|
| 451 |
+
attach_align_device_hook(
|
| 452 |
+
module,
|
| 453 |
+
execution_device=execution_device[module_name],
|
| 454 |
+
offload=True,
|
| 455 |
+
weights_map=weights_map,
|
| 456 |
+
offload_buffers=offload_buffers,
|
| 457 |
+
module_name=module_name,
|
| 458 |
+
preload_module_classes=preload_module_classes,
|
| 459 |
+
)
|
| 460 |
+
if not hasattr(module, "_hf_hook"):
|
| 461 |
+
hook = AlignDevicesHook(execution_device=execution_device[module_name], io_same_device=(module_name == ""))
|
| 462 |
+
add_hook_to_module(module, hook)
|
| 463 |
+
attach_execution_device_hook(
|
| 464 |
+
module, execution_device[module_name], preload_module_classes=preload_module_classes
|
| 465 |
+
)
|
| 466 |
+
elif module_name == "":
|
| 467 |
+
hook = AlignDevicesHook(io_same_device=True)
|
| 468 |
+
add_hook_to_module(module, hook)
|
| 469 |
+
|
| 470 |
+
for child_name, child in module.named_children():
|
| 471 |
+
child_name = f"{module_name}.{child_name}" if len(module_name) > 0 else child_name
|
| 472 |
+
attach_align_device_hook_on_blocks(
|
| 473 |
+
child,
|
| 474 |
+
execution_device=execution_device,
|
| 475 |
+
offload=offload,
|
| 476 |
+
weights_map=weights_map,
|
| 477 |
+
offload_buffers=offload_buffers,
|
| 478 |
+
module_name=child_name,
|
| 479 |
+
preload_module_classes=preload_module_classes,
|
| 480 |
+
)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/launchers.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
import tempfile
|
| 18 |
+
import warnings
|
| 19 |
+
|
| 20 |
+
import torch
|
| 21 |
+
|
| 22 |
+
from .state import AcceleratorState
|
| 23 |
+
from .utils import PrecisionType, PrepareForLaunch, is_torch_version, patch_environment
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def notebook_launcher(function, args=(), num_processes=None, use_fp16=False, mixed_precision="no", use_port="29500"):
|
| 27 |
+
"""
|
| 28 |
+
Launches a training function, using several processes if it's possible in the current environment (TPU with
|
| 29 |
+
multiple cores for instance).
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
function (`Callable`):
|
| 33 |
+
The training function to execute. If it accepts arguments, the first argument should be the index of the
|
| 34 |
+
process run.
|
| 35 |
+
args (`Tuple`):
|
| 36 |
+
Tuple of arguments to pass to the function (it will receive `*args`).
|
| 37 |
+
num_processes (`int`, *optional*):
|
| 38 |
+
The number of processes to use for training. Will default to 8 in Colab/Kaggle if a TPU is available, to
|
| 39 |
+
the number of GPUs available otherwise.
|
| 40 |
+
mixed_precision (`str`, *optional*, defaults to `"no"`):
|
| 41 |
+
If `fp16` or `bf16`, will use mixed precision training on multi-GPU.
|
| 42 |
+
use_port (`str`, *optional*, defaults to `"29500"`):
|
| 43 |
+
The port to use to communicate between processes when launching a multi-GPU training.
|
| 44 |
+
"""
|
| 45 |
+
# Are we in a google colab or a Kaggle Kernel?
|
| 46 |
+
if any(key.startswith("KAGGLE") for key in os.environ.keys()):
|
| 47 |
+
in_colab_or_kaggle = True
|
| 48 |
+
elif "IPython" in sys.modules:
|
| 49 |
+
in_colab_or_kaggle = "google.colab" in str(sys.modules["IPython"].get_ipython())
|
| 50 |
+
else:
|
| 51 |
+
in_colab_or_kaggle = False
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
mixed_precision = PrecisionType(mixed_precision.lower())
|
| 55 |
+
except ValueError:
|
| 56 |
+
raise ValueError(
|
| 57 |
+
f"Unknown mixed_precision mode: {args.mixed_precision.lower()}. Choose between {PrecisionType.list()}."
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
if in_colab_or_kaggle:
|
| 61 |
+
if os.environ.get("TPU_NAME", None) is not None:
|
| 62 |
+
# TPU launch
|
| 63 |
+
import torch_xla.distributed.xla_multiprocessing as xmp
|
| 64 |
+
|
| 65 |
+
if len(AcceleratorState._shared_state) > 0:
|
| 66 |
+
raise ValueError(
|
| 67 |
+
"To train on TPU in Colab or Kaggle Kernel, the `Accelerator` should only be initialized inside "
|
| 68 |
+
"your training function. Restart your notebook and make sure no cells initializes an "
|
| 69 |
+
"`Accelerator`."
|
| 70 |
+
)
|
| 71 |
+
if num_processes is None:
|
| 72 |
+
num_processes = 8
|
| 73 |
+
|
| 74 |
+
launcher = PrepareForLaunch(function, distributed_type="TPU")
|
| 75 |
+
print(f"Launching a training on {num_processes} TPU cores.")
|
| 76 |
+
xmp.spawn(launcher, args=args, nprocs=num_processes, start_method="fork")
|
| 77 |
+
else:
|
| 78 |
+
# No need for a distributed launch otherwise as it's either CPU or one GPU.
|
| 79 |
+
if torch.cuda.is_available():
|
| 80 |
+
print("Launching training on one GPU.")
|
| 81 |
+
else:
|
| 82 |
+
print("Launching training on one CPU.")
|
| 83 |
+
function(*args)
|
| 84 |
+
|
| 85 |
+
else:
|
| 86 |
+
if num_processes is None:
|
| 87 |
+
raise ValueError(
|
| 88 |
+
"You have to specify the number of GPUs you would like to use, add `num_processes=...` to your call."
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
if num_processes > 1:
|
| 92 |
+
# Multi-GPU launch
|
| 93 |
+
if is_torch_version("<", "1.5.0"):
|
| 94 |
+
raise ImportError(
|
| 95 |
+
"Using `notebook_launcher` for distributed training on GPUs require torch >= 1.5.0, got "
|
| 96 |
+
f"{torch.__version__}."
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
from torch.multiprocessing import start_processes
|
| 100 |
+
|
| 101 |
+
if len(AcceleratorState._shared_state) > 0:
|
| 102 |
+
raise ValueError(
|
| 103 |
+
"To launch a multi-GPU training from your notebook, the `Accelerator` should only be initialized "
|
| 104 |
+
"inside your training function. Restart your notebook and make sure no cells initializes an "
|
| 105 |
+
"`Accelerator`."
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
if torch.cuda.is_initialized():
|
| 109 |
+
raise ValueError(
|
| 110 |
+
"To launch a multi-GPU training from your notebook, you need to avoid running any instruction "
|
| 111 |
+
"using `torch.cuda` in any cell. Restart your notebook and make sure no cells use any CUDA "
|
| 112 |
+
"function."
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
if use_fp16:
|
| 116 |
+
warnings.warn('use_fp16=True is deprecated. Use mixed_precision="fp16" instead.', DeprecationWarning)
|
| 117 |
+
mixed_precision = "fp16"
|
| 118 |
+
|
| 119 |
+
# torch.distributed will expect a few environment variable to be here. We set the ones common to each
|
| 120 |
+
# process here (the other ones will be set be the launcher).
|
| 121 |
+
with patch_environment(
|
| 122 |
+
world_size=num_processes, master_addr="127.0.01", master_port=use_port, mixed_precision=mixed_precision
|
| 123 |
+
):
|
| 124 |
+
launcher = PrepareForLaunch(function, distributed_type="MULTI_GPU")
|
| 125 |
+
|
| 126 |
+
print(f"Launching training on {num_processes} GPUs.")
|
| 127 |
+
start_processes(launcher, args=args, nprocs=num_processes, start_method="fork")
|
| 128 |
+
|
| 129 |
+
else:
|
| 130 |
+
# No need for a distributed launch otherwise as it's either CPU or one GPU.
|
| 131 |
+
if torch.cuda.is_available():
|
| 132 |
+
print("Launching training on one GPU.")
|
| 133 |
+
else:
|
| 134 |
+
print("Launching training on CPU.")
|
| 135 |
+
function(*args)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def debug_launcher(function, args=(), num_processes=2):
|
| 139 |
+
"""
|
| 140 |
+
Launches a training function using several processes on CPU for debugging purposes.
|
| 141 |
+
|
| 142 |
+
<Tip warning={true}>
|
| 143 |
+
|
| 144 |
+
This function is provided for internal testing and debugging, but it's not intended for real trainings. It will
|
| 145 |
+
only use the CPU.
|
| 146 |
+
|
| 147 |
+
</Tip>
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
function (`Callable`):
|
| 151 |
+
The training function to execute.
|
| 152 |
+
args (`Tuple`):
|
| 153 |
+
Tuple of arguments to pass to the function (it will receive `*args`).
|
| 154 |
+
num_processes (`int`, *optional*, defaults to 2):
|
| 155 |
+
The number of processes to use for training.
|
| 156 |
+
"""
|
| 157 |
+
if is_torch_version("<", "1.5.0"):
|
| 158 |
+
raise ImportError(
|
| 159 |
+
"Using `debug_launcher` for distributed training on GPUs require torch >= 1.5.0, got "
|
| 160 |
+
f"{torch.__version__}."
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
from torch.multiprocessing import start_processes
|
| 164 |
+
|
| 165 |
+
with tempfile.NamedTemporaryFile() as tmp_file:
|
| 166 |
+
# torch.distributed will expect a few environment variable to be here. We set the ones common to each
|
| 167 |
+
# process here (the other ones will be set be the launcher).
|
| 168 |
+
with patch_environment(
|
| 169 |
+
world_size=num_processes,
|
| 170 |
+
master_addr="127.0.01",
|
| 171 |
+
master_port="29500",
|
| 172 |
+
mixed_precision="no",
|
| 173 |
+
accelerate_debug_rdv_file=tmp_file.name,
|
| 174 |
+
use_cpu="yes",
|
| 175 |
+
):
|
| 176 |
+
launcher = PrepareForLaunch(function, debug=True)
|
| 177 |
+
start_processes(launcher, args=args, nprocs=num_processes, start_method="fork")
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/logging.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import logging
|
| 16 |
+
|
| 17 |
+
from .state import AcceleratorState
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class MultiProcessAdapter(logging.LoggerAdapter):
|
| 21 |
+
"""
|
| 22 |
+
An adapter to assist with logging in multiprocess.
|
| 23 |
+
|
| 24 |
+
`log` takes in an additional `main_process_only` kwarg, which dictates whether it should be called on all processes
|
| 25 |
+
or only the main executed one. Default is `main_process_only=True`.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
@staticmethod
|
| 29 |
+
def _should_log(main_process_only):
|
| 30 |
+
"Check if log should be performed"
|
| 31 |
+
return not main_process_only or (main_process_only and AcceleratorState().local_process_index == 0)
|
| 32 |
+
|
| 33 |
+
def log(self, level, msg, *args, **kwargs):
|
| 34 |
+
"""
|
| 35 |
+
Delegates logger call after checking if we should log.
|
| 36 |
+
|
| 37 |
+
Accepts a new kwarg of `main_process_only`, which will dictate whether it will be logged across all processes
|
| 38 |
+
or only the main executed one. Default is `True` if not passed
|
| 39 |
+
"""
|
| 40 |
+
main_process_only = kwargs.pop("main_process_only", True)
|
| 41 |
+
if self.isEnabledFor(level) and self._should_log(main_process_only):
|
| 42 |
+
msg, kwargs = self.process(msg, kwargs)
|
| 43 |
+
self.logger.log(level, msg, *args, **kwargs)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_logger(name: str):
|
| 47 |
+
"""
|
| 48 |
+
Returns a `logging.Logger` for `name` that can handle multiprocessing.
|
| 49 |
+
|
| 50 |
+
If a log should be called on all processes, pass `main_process_only=False`
|
| 51 |
+
|
| 52 |
+
E.g.
|
| 53 |
+
```python
|
| 54 |
+
logger.info("My log", main_process_only=False)
|
| 55 |
+
logger.debug("My log", main_process_only=False)
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
name (`str`):
|
| 60 |
+
The name for the logger, such as `__file__`
|
| 61 |
+
"""
|
| 62 |
+
logger = logging.getLogger(name)
|
| 63 |
+
return MultiProcessAdapter(logger, {})
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/memory_utils.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
# flake8: noqa
|
| 16 |
+
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
| 17 |
+
# module, but to preserve other warnings. So, don't check this module at all
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
import warnings
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
warnings.warn(
|
| 24 |
+
"memory_utils has been reorganized to utils.memory. Import `find_executable_batchsize` from the main `__init__`: "
|
| 25 |
+
"`from accelerate import find_executable_batch_size` to avoid this warning.",
|
| 26 |
+
FutureWarning,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
from .utils.memory import find_executable_batch_size
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/optimizer.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import inspect
|
| 16 |
+
import warnings
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
|
| 20 |
+
from .state import AcceleratorState, GradientState
|
| 21 |
+
from .utils import DistributedType, honor_type, is_torch_version, is_tpu_available
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
if is_tpu_available(check_device=False):
|
| 25 |
+
import torch_xla.core.xla_model as xm
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def move_to_device(state, device):
|
| 29 |
+
if isinstance(state, (list, tuple)):
|
| 30 |
+
return honor_type(state, (move_to_device(t, device) for t in state))
|
| 31 |
+
elif isinstance(state, dict):
|
| 32 |
+
return type(state)({k: move_to_device(v, device) for k, v in state.items()})
|
| 33 |
+
elif isinstance(state, torch.Tensor):
|
| 34 |
+
return state.to(device)
|
| 35 |
+
return state
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class AcceleratedOptimizer(torch.optim.Optimizer):
|
| 39 |
+
"""
|
| 40 |
+
Internal wrapper around a torch optimizer.
|
| 41 |
+
|
| 42 |
+
Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient
|
| 43 |
+
accumulation.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
optimizer (`torch.optim.optimizer.Optimizer`):
|
| 47 |
+
The optimizer to wrap.
|
| 48 |
+
device_placement (`bool`, *optional*, defaults to `True`):
|
| 49 |
+
Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of
|
| 50 |
+
`optimizer` on the right device.
|
| 51 |
+
scaler (`torch.cuda.amp.grad_scaler.GradScaler`, *optional*):
|
| 52 |
+
The scaler to use in the step function if training with mixed precision.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def __init__(self, optimizer, device_placement=True, scaler=None):
|
| 56 |
+
self.optimizer = optimizer
|
| 57 |
+
self.scaler = scaler
|
| 58 |
+
self.accelerator_state = AcceleratorState()
|
| 59 |
+
self.gradient_state = GradientState()
|
| 60 |
+
self.device_placement = device_placement
|
| 61 |
+
self._is_overflow = False
|
| 62 |
+
|
| 63 |
+
# Handle device placement
|
| 64 |
+
if device_placement:
|
| 65 |
+
state_dict = self.optimizer.state_dict()
|
| 66 |
+
if self.accelerator_state.distributed_type == DistributedType.TPU:
|
| 67 |
+
xm.send_cpu_data_to_device(state_dict, self.accelerator_state.device)
|
| 68 |
+
else:
|
| 69 |
+
state_dict = move_to_device(state_dict, self.accelerator_state.device)
|
| 70 |
+
self.optimizer.load_state_dict(state_dict)
|
| 71 |
+
|
| 72 |
+
@property
|
| 73 |
+
def state(self):
|
| 74 |
+
return self.optimizer.state
|
| 75 |
+
|
| 76 |
+
@state.setter
|
| 77 |
+
def state(self, state):
|
| 78 |
+
self.optimizer.state = state
|
| 79 |
+
|
| 80 |
+
@property
|
| 81 |
+
def param_groups(self):
|
| 82 |
+
return self.optimizer.param_groups
|
| 83 |
+
|
| 84 |
+
@param_groups.setter
|
| 85 |
+
def param_groups(self, param_groups):
|
| 86 |
+
self.optimizer.param_groups = param_groups
|
| 87 |
+
|
| 88 |
+
@property
|
| 89 |
+
def defaults(self):
|
| 90 |
+
return self.optimizer.defaults
|
| 91 |
+
|
| 92 |
+
@defaults.setter
|
| 93 |
+
def defaults(self, defaults):
|
| 94 |
+
self.optimizer.defaults = defaults
|
| 95 |
+
|
| 96 |
+
def add_param_group(self, param_group):
|
| 97 |
+
self.optimizer.add_param_group(param_group)
|
| 98 |
+
|
| 99 |
+
def load_state_dict(self, state_dict):
|
| 100 |
+
if self.accelerator_state.distributed_type == DistributedType.TPU and self.device_placement:
|
| 101 |
+
xm.send_cpu_data_to_device(state_dict, self.accelerator_state.device)
|
| 102 |
+
self.optimizer.load_state_dict(state_dict)
|
| 103 |
+
|
| 104 |
+
def state_dict(self):
|
| 105 |
+
return self.optimizer.state_dict()
|
| 106 |
+
|
| 107 |
+
def zero_grad(self, set_to_none=None):
|
| 108 |
+
if self.gradient_state.sync_gradients:
|
| 109 |
+
if is_torch_version("<", "1.7.0"):
|
| 110 |
+
if set_to_none is not None:
|
| 111 |
+
raise ValueError(
|
| 112 |
+
"`set_to_none` for Optimizer.zero_grad` was introduced in PyTorch 1.7.0 and can't be used for "
|
| 113 |
+
f"earlier versions (found version {torch.__version__})."
|
| 114 |
+
)
|
| 115 |
+
self.optimizer.zero_grad()
|
| 116 |
+
else:
|
| 117 |
+
accept_arg = "set_to_none" in inspect.signature(self.optimizer.zero_grad).parameters
|
| 118 |
+
if accept_arg:
|
| 119 |
+
if set_to_none is None:
|
| 120 |
+
set_to_none = False
|
| 121 |
+
self.optimizer.zero_grad(set_to_none=set_to_none)
|
| 122 |
+
else:
|
| 123 |
+
if set_to_none is not None:
|
| 124 |
+
raise ValueError("`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.")
|
| 125 |
+
self.optimizer.zero_grad()
|
| 126 |
+
|
| 127 |
+
def step(self, closure=None):
|
| 128 |
+
if self.gradient_state.sync_gradients:
|
| 129 |
+
if self.accelerator_state.distributed_type == DistributedType.TPU:
|
| 130 |
+
optimizer_args = {"closure": closure} if closure is not None else {}
|
| 131 |
+
xm.optimizer_step(self.optimizer, optimizer_args=optimizer_args)
|
| 132 |
+
elif self.scaler is not None:
|
| 133 |
+
scale_before = self.scaler.get_scale()
|
| 134 |
+
self.scaler.step(self.optimizer, closure)
|
| 135 |
+
self.scaler.update()
|
| 136 |
+
scale_after = self.scaler.get_scale()
|
| 137 |
+
# If we reduced the loss scale, it means the optimizer step was skipped because of gradient overflow.
|
| 138 |
+
self._is_overflow = scale_after < scale_before
|
| 139 |
+
else:
|
| 140 |
+
self.optimizer.step(closure)
|
| 141 |
+
|
| 142 |
+
def _switch_parameters(self, parameters_map):
|
| 143 |
+
for param_group in self.optimizer.param_groups:
|
| 144 |
+
param_group["params"] = [parameters_map.get(p, p) for p in param_group["params"]]
|
| 145 |
+
|
| 146 |
+
@property
|
| 147 |
+
def is_overflow(self):
|
| 148 |
+
"""Whether or not the optimizer step was done, or skipped because of gradient overflow."""
|
| 149 |
+
warnings.warn(
|
| 150 |
+
"The `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use "
|
| 151 |
+
"`optimizer.step_was_skipped` instead.",
|
| 152 |
+
FutureWarning,
|
| 153 |
+
)
|
| 154 |
+
return self._is_overflow
|
| 155 |
+
|
| 156 |
+
@property
|
| 157 |
+
def step_was_skipped(self):
|
| 158 |
+
"""Whether or not the optimizer step was skipped."""
|
| 159 |
+
return self._is_overflow
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/scheduler.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
# We ignore warnings about stepping the scheduler since we step it ourselves during gradient accumulation
|
| 16 |
+
|
| 17 |
+
import warnings
|
| 18 |
+
|
| 19 |
+
from .state import AcceleratorState
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="torch.optim.lr_scheduler")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class AcceleratedScheduler:
|
| 26 |
+
"""
|
| 27 |
+
A wrapper around a learning rate scheduler that will only step when the optimizer(s) have a training step. Useful
|
| 28 |
+
to avoid making a scheduler step too fast when gradients went overflow and there was no training step (in mixed
|
| 29 |
+
precision training)
|
| 30 |
+
|
| 31 |
+
When performing gradient accumulation scheduler lengths should not be changed accordingly, accelerate will always
|
| 32 |
+
step the scheduler to account for it.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
scheduler (`torch.optim.lr_scheduler._LRScheduler`):
|
| 36 |
+
The scheduler to wrap.
|
| 37 |
+
optimizers (one or a list of `torch.optim.Optimizer`):
|
| 38 |
+
The optimizers used.
|
| 39 |
+
step_with_optimizer (`bool`, *optional*, defaults to `True`):
|
| 40 |
+
Whether or not the scheduler should be stepped at each optimizer step.
|
| 41 |
+
split_batches (`bool`, *optional*, defaults to `False`):
|
| 42 |
+
Whether or not the dataloaders split one batch across the different processes (so batch size is the same
|
| 43 |
+
regardless of the number of processes) or create batches on each process (so batch size is the original
|
| 44 |
+
batch size multiplied by the number of processes).
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def __init__(self, scheduler, optimizers, step_with_optimizer: bool = True, split_batches: bool = False):
|
| 48 |
+
self.scheduler = scheduler
|
| 49 |
+
self.optimizers = optimizers if isinstance(optimizers, (list, tuple)) else [optimizers]
|
| 50 |
+
self.split_batches = split_batches
|
| 51 |
+
self.step_with_optimizer = step_with_optimizer
|
| 52 |
+
|
| 53 |
+
def step(self, *args, **kwargs):
|
| 54 |
+
if not self.step_with_optimizer:
|
| 55 |
+
# No link between scheduler and optimizer -> just step
|
| 56 |
+
self.scheduler.step(*args, **kwargs)
|
| 57 |
+
return
|
| 58 |
+
|
| 59 |
+
# Otherwise, first make sure the optimizer was stepped.
|
| 60 |
+
for opt in self.optimizers:
|
| 61 |
+
if opt.step_was_skipped:
|
| 62 |
+
return
|
| 63 |
+
if self.split_batches:
|
| 64 |
+
# Split batches -> the training dataloader batch size is not changed so one step per training step
|
| 65 |
+
self.scheduler.step(*args, **kwargs)
|
| 66 |
+
else:
|
| 67 |
+
# Otherwise the training dataloader batch size was multiplied by `num_processes`, so we need to do
|
| 68 |
+
# num_processes steps per training step
|
| 69 |
+
num_processes = AcceleratorState().num_processes
|
| 70 |
+
for _ in range(num_processes):
|
| 71 |
+
# Special case when using OneCycle and `drop_last` was not used
|
| 72 |
+
if getattr(self.scheduler, "total_steps", 0) <= self.scheduler.last_epoch:
|
| 73 |
+
self.scheduler.step(*args, **kwargs)
|
| 74 |
+
|
| 75 |
+
# Passthroughs
|
| 76 |
+
def get_last_lr(self):
|
| 77 |
+
return self.scheduler.get_last_lr()
|
| 78 |
+
|
| 79 |
+
def state_dict(self):
|
| 80 |
+
return self.scheduler.state_dict()
|
| 81 |
+
|
| 82 |
+
def load_state_dict(self, state_dict):
|
| 83 |
+
self.scheduler.load_state_dict(state_dict)
|
| 84 |
+
|
| 85 |
+
def get_lr(self):
|
| 86 |
+
return self.scheduler.get_lr()
|
| 87 |
+
|
| 88 |
+
def print_lr(self, *args, **kwargs):
|
| 89 |
+
return self.scheduler.print_lr(*args, **kwargs)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/state.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2021 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
from distutils.util import strtobool
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
|
| 20 |
+
from .utils import DistributedType, is_ccl_available, is_deepspeed_available, is_tpu_available
|
| 21 |
+
from .utils.dataclasses import SageMakerDistributedType
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
if is_tpu_available(check_device=False):
|
| 25 |
+
import torch_xla.core.xla_model as xm
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_int_from_env(env_keys, default):
|
| 29 |
+
"""Returns the first positive env value found in the `env_keys` list or the default."""
|
| 30 |
+
for e in env_keys:
|
| 31 |
+
val = int(os.environ.get(e, -1))
|
| 32 |
+
if val >= 0:
|
| 33 |
+
return val
|
| 34 |
+
return default
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def parse_flag_from_env(key, default=False):
|
| 38 |
+
value = os.environ.get(key, str(default))
|
| 39 |
+
return strtobool(value) == 1 # As its name indicates `strtobool` actually returns an int...
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def parse_choice_from_env(key, default="no"):
|
| 43 |
+
value = os.environ.get(key, str(default))
|
| 44 |
+
return value
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Inspired by Alex Martelli's 'Borg'.
|
| 48 |
+
class AcceleratorState:
|
| 49 |
+
"""
|
| 50 |
+
This is a variation of a [singleton class](https://en.wikipedia.org/wiki/Singleton_pattern) in the sense that all
|
| 51 |
+
instance of `AcceleratorState` share the same state, which is initialized on the first instantiation.
|
| 52 |
+
|
| 53 |
+
Attributes:
|
| 54 |
+
|
| 55 |
+
- **device** (`torch.device`) -- The device to use.
|
| 56 |
+
- **sync_gradients** (`bool`) -- Whether to sync the gradients or not
|
| 57 |
+
- **distributed_type** (`~accelerate.state.DistributedType`) -- The type of distributed environment currently
|
| 58 |
+
in use.
|
| 59 |
+
- **num_processes** (`int`) -- The number of processes currently launched in parallel.
|
| 60 |
+
- **process_index** (`int`) -- The index of the current process.
|
| 61 |
+
- **local_process_index** (`int`) -- The index of the current process on the current server.
|
| 62 |
+
- **mixed_precision** (`str`) -- Whether or not the current script will use mixed precision. If you are using
|
| 63 |
+
mixed precision, define if you want to use FP16 or BF16 (bfloat16) as the floating point.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
_shared_state = {}
|
| 67 |
+
|
| 68 |
+
def __init__(
|
| 69 |
+
self,
|
| 70 |
+
mixed_precision: str = None,
|
| 71 |
+
cpu: bool = False,
|
| 72 |
+
deepspeed_plugin=None,
|
| 73 |
+
fsdp_plugin=None,
|
| 74 |
+
_from_accelerator: bool = False,
|
| 75 |
+
**kwargs,
|
| 76 |
+
):
|
| 77 |
+
self.__dict__ = self._shared_state
|
| 78 |
+
if parse_flag_from_env("USE_CPU"):
|
| 79 |
+
cpu = True
|
| 80 |
+
self._check_initialized(mixed_precision, cpu)
|
| 81 |
+
self.fork_launched = parse_flag_from_env("FORK_LAUNCHED", 0)
|
| 82 |
+
if not getattr(self, "initialized", False):
|
| 83 |
+
self.backend = None
|
| 84 |
+
self.deepspeed_plugin = None
|
| 85 |
+
mixed_precision = (
|
| 86 |
+
parse_choice_from_env("MIXED_PRECISION", "no") if mixed_precision is None else mixed_precision.lower()
|
| 87 |
+
)
|
| 88 |
+
if not _from_accelerator:
|
| 89 |
+
raise ValueError(
|
| 90 |
+
"Please make sure to properly initialize your accelerator via `accelerator = Accelerator()` "
|
| 91 |
+
"before using any functionality from the `accelerate` library."
|
| 92 |
+
)
|
| 93 |
+
if (
|
| 94 |
+
os.environ.get("USE_SAGEMAKER", "false") == "true"
|
| 95 |
+
and os.environ.get("SAGEMAKER_DISTRIBUTED_TYPE") != SageMakerDistributedType.NO
|
| 96 |
+
and not cpu
|
| 97 |
+
):
|
| 98 |
+
if os.environ.get("SAGEMAKER_DISTRIBUTED_TYPE") == SageMakerDistributedType.DATA_PARALLEL:
|
| 99 |
+
self.distributed_type = DistributedType.MULTI_GPU
|
| 100 |
+
import smdistributed.dataparallel.torch.torch_smddp # noqa
|
| 101 |
+
|
| 102 |
+
if not torch.distributed.is_initialized():
|
| 103 |
+
torch.distributed.init_process_group(backend="smddp")
|
| 104 |
+
self.backend = "smddp"
|
| 105 |
+
self.num_processes = torch.distributed.get_world_size()
|
| 106 |
+
self.process_index = torch.distributed.get_rank()
|
| 107 |
+
self.local_process_index = int(os.environ.get("LOCAL_RANK", -1))
|
| 108 |
+
self.device = torch.device("cuda", self.local_process_index)
|
| 109 |
+
torch.cuda.set_device(self.device)
|
| 110 |
+
self.mixed_precision = mixed_precision
|
| 111 |
+
elif is_tpu_available() and not cpu:
|
| 112 |
+
self.distributed_type = DistributedType.TPU
|
| 113 |
+
self.num_processes = xm.xrt_world_size()
|
| 114 |
+
self.process_index = xm.get_ordinal()
|
| 115 |
+
self.local_process_index = xm.get_local_ordinal()
|
| 116 |
+
self.device = xm.xla_device()
|
| 117 |
+
if mixed_precision == "bf16":
|
| 118 |
+
os.environ["XLA_USE_BF16"] = str(1)
|
| 119 |
+
self.mixed_precision = mixed_precision
|
| 120 |
+
elif os.environ.get("USE_DEEPSPEED", "false") == "true" and not cpu:
|
| 121 |
+
assert (
|
| 122 |
+
is_deepspeed_available()
|
| 123 |
+
), "DeepSpeed is not available => install it using `pip3 install deepspeed` or build it from source"
|
| 124 |
+
self.distributed_type = DistributedType.DEEPSPEED
|
| 125 |
+
if not torch.distributed.is_initialized():
|
| 126 |
+
torch.distributed.init_process_group(backend="nccl", **kwargs)
|
| 127 |
+
self.backend = "nccl"
|
| 128 |
+
self.num_processes = torch.distributed.get_world_size()
|
| 129 |
+
self.process_index = torch.distributed.get_rank()
|
| 130 |
+
self.local_process_index = int(os.environ.get("LOCAL_RANK", -1))
|
| 131 |
+
self.device = torch.device("cuda", self.local_process_index)
|
| 132 |
+
torch.cuda.set_device(self.device)
|
| 133 |
+
self.mixed_precision = "no" # deepspeed handles mixed_precision using deepspeed_config
|
| 134 |
+
self.deepspeed_plugin = deepspeed_plugin
|
| 135 |
+
elif int(os.environ.get("LOCAL_RANK", -1)) != -1 and not cpu:
|
| 136 |
+
self.distributed_type = DistributedType.MULTI_GPU
|
| 137 |
+
if not torch.distributed.is_initialized():
|
| 138 |
+
torch.distributed.init_process_group(backend="nccl", **kwargs)
|
| 139 |
+
self.backend = "nccl"
|
| 140 |
+
self.num_processes = torch.distributed.get_world_size()
|
| 141 |
+
self.process_index = torch.distributed.get_rank()
|
| 142 |
+
self.local_process_index = int(os.environ.get("LOCAL_RANK", -1))
|
| 143 |
+
self.device = torch.device("cuda", self.local_process_index)
|
| 144 |
+
torch.cuda.set_device(self.device)
|
| 145 |
+
self.mixed_precision = mixed_precision
|
| 146 |
+
if os.environ.get("USE_FSDP", "false") == "true":
|
| 147 |
+
self.distributed_type = DistributedType.FSDP
|
| 148 |
+
if self.mixed_precision != "no":
|
| 149 |
+
fsdp_plugin.set_mixed_precision(self.mixed_precision)
|
| 150 |
+
self.fsdp_plugin = fsdp_plugin
|
| 151 |
+
elif get_int_from_env(["PMI_SIZE", "OMPI_COMM_WORLD_SIZE", "MV2_COMM_WORLD_SIZE", "WORLD_SIZE"], 1) > 1:
|
| 152 |
+
self.distributed_type = DistributedType.MULTI_CPU
|
| 153 |
+
if is_ccl_available() and get_int_from_env(["CCL_WORKER_COUNT"], 0) > 0:
|
| 154 |
+
backend = "ccl"
|
| 155 |
+
elif torch.distributed.is_mpi_available():
|
| 156 |
+
backend = "mpi"
|
| 157 |
+
else:
|
| 158 |
+
backend = "gloo"
|
| 159 |
+
# Try to get launch configuration from environment variables set by MPI launcher - works for Intel MPI, OpenMPI and MVAPICH
|
| 160 |
+
rank = get_int_from_env(["RANK", "PMI_RANK", "OMPI_COMM_WORLD_RANK", "MV2_COMM_WORLD_RANK"], 0)
|
| 161 |
+
size = get_int_from_env(["WORLD_SIZE", "PMI_SIZE", "OMPI_COMM_WORLD_SIZE", "MV2_COMM_WORLD_SIZE"], 1)
|
| 162 |
+
local_rank = get_int_from_env(
|
| 163 |
+
["LOCAL_RANK", "MPI_LOCALRANKID", "OMPI_COMM_WORLD_LOCAL_RANK", "MV2_COMM_WORLD_LOCAL_RANK"], 0
|
| 164 |
+
)
|
| 165 |
+
local_size = get_int_from_env(
|
| 166 |
+
["MPI_LOCALNRANKS", "OMPI_COMM_WORLD_LOCAL_SIZE", "MV2_COMM_WORLD_LOCAL_SIZE"], 1
|
| 167 |
+
)
|
| 168 |
+
self.local_process_index = local_rank
|
| 169 |
+
os.environ["RANK"] = str(rank)
|
| 170 |
+
os.environ["WORLD_SIZE"] = str(size)
|
| 171 |
+
os.environ["LOCAL_RANK"] = str(local_rank)
|
| 172 |
+
if not os.environ.get("MASTER_PORT", None):
|
| 173 |
+
os.environ["MASTER_PORT"] = "29500"
|
| 174 |
+
if not os.environ.get("MASTER_ADDR", None):
|
| 175 |
+
if local_size != size and backend != "mpi":
|
| 176 |
+
raise ValueError(
|
| 177 |
+
"Looks like distributed multinode run but MASTER_ADDR env not set, "
|
| 178 |
+
"please try exporting rank 0's hostname as MASTER_ADDR"
|
| 179 |
+
)
|
| 180 |
+
if not torch.distributed.is_initialized():
|
| 181 |
+
torch.distributed.init_process_group(backend, rank=rank, world_size=size, **kwargs)
|
| 182 |
+
self.backend = backend
|
| 183 |
+
self.num_processes = torch.distributed.get_world_size()
|
| 184 |
+
self.process_index = torch.distributed.get_rank()
|
| 185 |
+
self.local_process_index = local_rank
|
| 186 |
+
self.device = torch.device("cpu")
|
| 187 |
+
self.mixed_precision = mixed_precision
|
| 188 |
+
else:
|
| 189 |
+
self.distributed_type = DistributedType.NO
|
| 190 |
+
self.num_processes = 1
|
| 191 |
+
self.process_index = self.local_process_index = 0
|
| 192 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() and not cpu else "cpu")
|
| 193 |
+
self.mixed_precision = mixed_precision
|
| 194 |
+
self.initialized = True
|
| 195 |
+
|
| 196 |
+
def __repr__(self):
|
| 197 |
+
mixed_precision = self.mixed_precision
|
| 198 |
+
|
| 199 |
+
repr = (
|
| 200 |
+
f"Distributed environment: {self.distributed_type}{(' Backend: ' + self.backend) if self.backend else ''}\n"
|
| 201 |
+
f"Num processes: {self.num_processes}\n"
|
| 202 |
+
f"Process index: {self.process_index}\n"
|
| 203 |
+
f"Local process index: {self.local_process_index}\n"
|
| 204 |
+
f"Device: {self.device}\n"
|
| 205 |
+
)
|
| 206 |
+
if self.distributed_type == DistributedType.DEEPSPEED:
|
| 207 |
+
repr += f"ds_config: {self.deepspeed_plugin.deepspeed_config}\n"
|
| 208 |
+
else:
|
| 209 |
+
f"Mixed precision type: {mixed_precision}\n"
|
| 210 |
+
return repr
|
| 211 |
+
|
| 212 |
+
# For backward compatibility
|
| 213 |
+
@property
|
| 214 |
+
def use_fp16(self):
|
| 215 |
+
return self.mixed_precision != "no"
|
| 216 |
+
|
| 217 |
+
@staticmethod
|
| 218 |
+
def _reset_state():
|
| 219 |
+
"Resets `_shared_state`, is used internally and should not be called"
|
| 220 |
+
AcceleratorState._shared_state = {}
|
| 221 |
+
|
| 222 |
+
def _check_initialized(self, mixed_precision=None, cpu=None):
|
| 223 |
+
"Checks if a modification is trying to be made and the `AcceleratorState` has already been initialized"
|
| 224 |
+
if getattr(self, "initialized", False):
|
| 225 |
+
err = "AcceleratorState has already been initialized and cannot be changed, restart your runtime completely and pass `{flag}` to `Accelerate()`."
|
| 226 |
+
if cpu and self.device.type != "cpu":
|
| 227 |
+
raise ValueError(err.format(flag="cpu=True"))
|
| 228 |
+
if mixed_precision is not None and mixed_precision != self.mixed_precision:
|
| 229 |
+
raise ValueError(err.format(flag=f"mixed_precision='{mixed_precision}'"))
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
class GradientState:
|
| 233 |
+
"""
|
| 234 |
+
This is a variation of a [singleton class](https://en.wikipedia.org/wiki/Singleton_pattern) in the sense that all
|
| 235 |
+
instance of `GradientState` share the same state, which is initialized on the first instantiation.
|
| 236 |
+
|
| 237 |
+
This specific state revolves around whether gradients should be synced and if we have reached the end of a prepared
|
| 238 |
+
dataloader Attributes:
|
| 239 |
+
|
| 240 |
+
- **sync_gradients** (`bool`) -- Whether the gradients should be synced
|
| 241 |
+
- **end_of_dataloader** (`bool`) -- Whether we have reached the end the current dataloader
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
_shared_state = {}
|
| 245 |
+
|
| 246 |
+
def __init__(self):
|
| 247 |
+
self.__dict__ = self._shared_state
|
| 248 |
+
if not getattr(self, "initialized", False):
|
| 249 |
+
self.sync_gradients = True
|
| 250 |
+
self.end_of_dataloader = False
|
| 251 |
+
self.initialized = True
|
| 252 |
+
|
| 253 |
+
def __repr__(self):
|
| 254 |
+
return f"Sync Gradients: {self.sync_gradients}\n" f"At end of current dataloader: {self.end_of_dataloader}\n"
|
| 255 |
+
|
| 256 |
+
def _set_sync_gradients(self, sync_gradients):
|
| 257 |
+
"Private function that sets whether gradients should be synchronized. Users should not have to call this."
|
| 258 |
+
self.sync_gradients = sync_gradients
|
| 259 |
+
|
| 260 |
+
def _set_end_of_dataloader(self, end_of_dataloader):
|
| 261 |
+
"Private function that sets whether the end of the current dataloader has been reached. Users should not have to call this."
|
| 262 |
+
self.end_of_dataloader = end_of_dataloader
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/tracking.py
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
# Expectation:
|
| 16 |
+
# Provide a project dir name, then each type of logger gets stored in project/{`logging_dir`}
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
from abc import ABCMeta, abstractmethod, abstractproperty
|
| 20 |
+
from typing import List, Optional, Union
|
| 21 |
+
|
| 22 |
+
from .logging import get_logger
|
| 23 |
+
from .utils import LoggerType, is_comet_ml_available, is_tensorboard_available, is_wandb_available
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
_available_trackers = []
|
| 27 |
+
|
| 28 |
+
if is_tensorboard_available():
|
| 29 |
+
from torch.utils import tensorboard
|
| 30 |
+
|
| 31 |
+
_available_trackers.append(LoggerType.TENSORBOARD)
|
| 32 |
+
|
| 33 |
+
if is_wandb_available():
|
| 34 |
+
import wandb
|
| 35 |
+
|
| 36 |
+
_available_trackers.append(LoggerType.WANDB)
|
| 37 |
+
|
| 38 |
+
if is_comet_ml_available():
|
| 39 |
+
from comet_ml import Experiment
|
| 40 |
+
|
| 41 |
+
_available_trackers.append(LoggerType.COMETML)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
logger = get_logger(__name__)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def get_available_trackers():
|
| 48 |
+
"Returns a list of all supported available trackers in the system"
|
| 49 |
+
return _available_trackers
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class GeneralTracker(object, metaclass=ABCMeta):
|
| 53 |
+
"""
|
| 54 |
+
A base Tracker class to be used for all logging integration implementations.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
@abstractproperty
|
| 58 |
+
def requires_logging_directory(self):
|
| 59 |
+
"""
|
| 60 |
+
Whether the logger requires a directory to store their logs. Should either return `True` or `False`.
|
| 61 |
+
"""
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
@abstractmethod
|
| 65 |
+
def store_init_configuration(self, values: dict):
|
| 66 |
+
"""
|
| 67 |
+
Logs `values` as hyperparameters for the run. Implementations should use the experiment configuration
|
| 68 |
+
functionality of a tracking API.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
values (Dictionary `str` to `bool`, `str`, `float` or `int`):
|
| 72 |
+
Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`,
|
| 73 |
+
`str`, `float`, `int`, or `None`.
|
| 74 |
+
"""
|
| 75 |
+
pass
|
| 76 |
+
|
| 77 |
+
@abstractmethod
|
| 78 |
+
def log(self, values: dict, step: Optional[int]):
|
| 79 |
+
"""
|
| 80 |
+
Logs `values` to the current run. Base `log` implementations of a tracking API should go in here, along with
|
| 81 |
+
special behavior for the `step parameter.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
values (Dictionary `str` to `str`, `float`, or `int`):
|
| 85 |
+
Values to be logged as key-value pairs. The values need to have type `str`, `float`, or `int`.
|
| 86 |
+
step (`int`, *optional*):
|
| 87 |
+
The run step. If included, the log will be affiliated with this step.
|
| 88 |
+
"""
|
| 89 |
+
pass
|
| 90 |
+
|
| 91 |
+
def finish(self):
|
| 92 |
+
"""
|
| 93 |
+
Should run any finalizing functions within the tracking API. If the API should not have one, just don't
|
| 94 |
+
overwrite that method.
|
| 95 |
+
"""
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class TensorBoardTracker(GeneralTracker):
|
| 100 |
+
"""
|
| 101 |
+
A `Tracker` class that supports `tensorboard`. Should be initialized at the start of your script.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
run_name (`str`):
|
| 105 |
+
The name of the experiment run
|
| 106 |
+
logging_dir (`str`, `os.PathLike`):
|
| 107 |
+
Location for TensorBoard logs to be stored.
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
requires_logging_directory = True
|
| 111 |
+
|
| 112 |
+
def __init__(self, run_name: str, logging_dir: Optional[Union[str, os.PathLike]]):
|
| 113 |
+
self.run_name = run_name
|
| 114 |
+
self.logging_dir = os.path.join(logging_dir, run_name)
|
| 115 |
+
self.writer = tensorboard.SummaryWriter(self.logging_dir)
|
| 116 |
+
logger.info(f"Initialized TensorBoard project {self.run_name} logging to {self.logging_dir}")
|
| 117 |
+
logger.info(
|
| 118 |
+
"Make sure to log any initial configurations with `self.store_init_configuration` before training!"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
def store_init_configuration(self, values: dict):
|
| 122 |
+
"""
|
| 123 |
+
Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment.
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
values (Dictionary `str` to `bool`, `str`, `float` or `int`):
|
| 127 |
+
Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`,
|
| 128 |
+
`str`, `float`, `int`, or `None`.
|
| 129 |
+
"""
|
| 130 |
+
self.writer.add_hparams(values, metric_dict={})
|
| 131 |
+
self.writer.flush()
|
| 132 |
+
logger.info("Stored initial configuration hyperparameters to TensorBoard")
|
| 133 |
+
|
| 134 |
+
def log(self, values: dict, step: Optional[int] = None):
|
| 135 |
+
"""
|
| 136 |
+
Logs `values` to the current run.
|
| 137 |
+
|
| 138 |
+
Args:
|
| 139 |
+
values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`):
|
| 140 |
+
Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of
|
| 141 |
+
`str` to `float`/`int`.
|
| 142 |
+
step (`int`, *optional*):
|
| 143 |
+
The run step. If included, the log will be affiliated with this step.
|
| 144 |
+
"""
|
| 145 |
+
for k, v in values.items():
|
| 146 |
+
if isinstance(v, (int, float)):
|
| 147 |
+
self.writer.add_scalar(k, v, global_step=step)
|
| 148 |
+
elif isinstance(v, str):
|
| 149 |
+
self.writer.add_text(k, v, global_step=step)
|
| 150 |
+
elif isinstance(v, dict):
|
| 151 |
+
self.writer.add_scalars(k, v, global_step=step)
|
| 152 |
+
self.writer.flush()
|
| 153 |
+
logger.info("Successfully logged to TensorBoard")
|
| 154 |
+
|
| 155 |
+
def finish(self):
|
| 156 |
+
"""
|
| 157 |
+
Closes `TensorBoard` writer
|
| 158 |
+
"""
|
| 159 |
+
self.writer.close()
|
| 160 |
+
logger.info("TensorBoard writer closed")
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
class WandBTracker(GeneralTracker):
|
| 164 |
+
"""
|
| 165 |
+
A `Tracker` class that supports `wandb`. Should be initialized at the start of your script.
|
| 166 |
+
|
| 167 |
+
Args:
|
| 168 |
+
run_name (`str`):
|
| 169 |
+
The name of the experiment run.
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
+
requires_logging_directory = False
|
| 173 |
+
|
| 174 |
+
def __init__(self, run_name: str):
|
| 175 |
+
self.run_name = run_name
|
| 176 |
+
self.run = wandb.init(project=self.run_name)
|
| 177 |
+
logger.info(f"Initialized WandB project {self.run_name}")
|
| 178 |
+
logger.info(
|
| 179 |
+
"Make sure to log any initial configurations with `self.store_init_configuration` before training!"
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
def store_init_configuration(self, values: dict):
|
| 183 |
+
"""
|
| 184 |
+
Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment.
|
| 185 |
+
|
| 186 |
+
Args:
|
| 187 |
+
values (Dictionary `str` to `bool`, `str`, `float` or `int`):
|
| 188 |
+
Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`,
|
| 189 |
+
`str`, `float`, `int`, or `None`.
|
| 190 |
+
"""
|
| 191 |
+
wandb.config.update(values)
|
| 192 |
+
logger.info("Stored initial configuration hyperparameters to WandB")
|
| 193 |
+
|
| 194 |
+
def log(self, values: dict, step: Optional[int] = None):
|
| 195 |
+
"""
|
| 196 |
+
Logs `values` to the current run.
|
| 197 |
+
|
| 198 |
+
Args:
|
| 199 |
+
values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`):
|
| 200 |
+
Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of
|
| 201 |
+
`str` to `float`/`int`.
|
| 202 |
+
step (`int`, *optional*):
|
| 203 |
+
The run step. If included, the log will be affiliated with this step.
|
| 204 |
+
"""
|
| 205 |
+
self.run.log(values, step=step)
|
| 206 |
+
logger.info("Successfully logged to WandB")
|
| 207 |
+
|
| 208 |
+
def finish(self):
|
| 209 |
+
"""
|
| 210 |
+
Closes `wandb` writer
|
| 211 |
+
"""
|
| 212 |
+
self.run.finish()
|
| 213 |
+
logger.info("WandB run closed")
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
class CometMLTracker(GeneralTracker):
|
| 217 |
+
"""
|
| 218 |
+
A `Tracker` class that supports `comet_ml`. Should be initialized at the start of your script.
|
| 219 |
+
|
| 220 |
+
API keys must be stored in a Comet config file.
|
| 221 |
+
|
| 222 |
+
Args:
|
| 223 |
+
run_name (`str`):
|
| 224 |
+
The name of the experiment run.
|
| 225 |
+
"""
|
| 226 |
+
|
| 227 |
+
requires_logging_directory = False
|
| 228 |
+
|
| 229 |
+
def __init__(self, run_name: str):
|
| 230 |
+
self.run_name = run_name
|
| 231 |
+
self.writer = Experiment(project_name=run_name)
|
| 232 |
+
logger.info(f"Initialized CometML project {self.run_name}")
|
| 233 |
+
logger.info(
|
| 234 |
+
"Make sure to log any initial configurations with `self.store_init_configuration` before training!"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
def store_init_configuration(self, values: dict):
|
| 238 |
+
"""
|
| 239 |
+
Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment.
|
| 240 |
+
|
| 241 |
+
Args:
|
| 242 |
+
values (Dictionary `str` to `bool`, `str`, `float` or `int`):
|
| 243 |
+
Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`,
|
| 244 |
+
`str`, `float`, `int`, or `None`.
|
| 245 |
+
"""
|
| 246 |
+
self.writer.log_parameters(values)
|
| 247 |
+
logger.info("Stored initial configuration hyperparameters to CometML")
|
| 248 |
+
|
| 249 |
+
def log(self, values: dict, step: Optional[int] = None):
|
| 250 |
+
"""
|
| 251 |
+
Logs `values` to the current run.
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`):
|
| 255 |
+
Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of
|
| 256 |
+
`str` to `float`/`int`.
|
| 257 |
+
step (`int`, *optional*):
|
| 258 |
+
The run step. If included, the log will be affiliated with this step.
|
| 259 |
+
"""
|
| 260 |
+
if step is not None:
|
| 261 |
+
self.writer.set_step(step)
|
| 262 |
+
for k, v in values.items():
|
| 263 |
+
if isinstance(v, (int, float)):
|
| 264 |
+
self.writer.log_metric(k, v, step=step)
|
| 265 |
+
elif isinstance(v, str):
|
| 266 |
+
self.writer.log_other(k, v)
|
| 267 |
+
elif isinstance(v, dict):
|
| 268 |
+
self.writer.log_metrics(v, step=step)
|
| 269 |
+
logger.info("Successfully logged to CometML")
|
| 270 |
+
|
| 271 |
+
def finish(self):
|
| 272 |
+
"""
|
| 273 |
+
Closes `comet-ml` writer
|
| 274 |
+
"""
|
| 275 |
+
self.writer.end()
|
| 276 |
+
logger.info("CometML run closed")
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
LOGGER_TYPE_TO_CLASS = {"tensorboard": TensorBoardTracker, "wandb": WandBTracker, "comet_ml": CometMLTracker}
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def filter_trackers(
|
| 283 |
+
log_with: List[Union[str, LoggerType, GeneralTracker]], logging_dir: Union[str, os.PathLike] = None
|
| 284 |
+
):
|
| 285 |
+
"""
|
| 286 |
+
Takes in a list of potential tracker types and checks that:
|
| 287 |
+
- The tracker wanted is available in that environment
|
| 288 |
+
- Filters out repeats of tracker types
|
| 289 |
+
- If `all` is in `log_with`, will return all trackers in the environment
|
| 290 |
+
- If a tracker requires a `logging_dir`, ensures that `logging_dir` is not `None`
|
| 291 |
+
|
| 292 |
+
Args:
|
| 293 |
+
log_with (list of `str`, [`~utils.LoggerType`] or [`~tracking.GeneralTracker`], *optional*):
|
| 294 |
+
A list of loggers to be setup for experiment tracking. Should be one or several of:
|
| 295 |
+
|
| 296 |
+
- `"all"`
|
| 297 |
+
- `"tensorboard"`
|
| 298 |
+
- `"wandb"`
|
| 299 |
+
- `"comet_ml"`
|
| 300 |
+
If `"all`" is selected, will pick up all available trackers in the environment and intialize them. Can also
|
| 301 |
+
accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`.
|
| 302 |
+
logging_dir (`str`, `os.PathLike`, *optional*):
|
| 303 |
+
A path to a directory for storing logs of locally-compatible loggers.
|
| 304 |
+
"""
|
| 305 |
+
loggers = []
|
| 306 |
+
if log_with is not None:
|
| 307 |
+
if not isinstance(log_with, (list, tuple)):
|
| 308 |
+
log_with = [log_with]
|
| 309 |
+
logger.debug(f"{log_with}")
|
| 310 |
+
if "all" in log_with or LoggerType.ALL in log_with:
|
| 311 |
+
loggers = [o for o in log_with if issubclass(type(o), GeneralTracker)] + get_available_trackers()
|
| 312 |
+
else:
|
| 313 |
+
for log_type in log_with:
|
| 314 |
+
if log_type not in LoggerType and not issubclass(type(log_type), GeneralTracker):
|
| 315 |
+
raise ValueError(f"Unsupported logging capability: {log_type}. Choose between {LoggerType.list()}")
|
| 316 |
+
if issubclass(type(log_type), GeneralTracker):
|
| 317 |
+
loggers.append(log_type)
|
| 318 |
+
else:
|
| 319 |
+
log_type = LoggerType(log_type)
|
| 320 |
+
if log_type not in loggers:
|
| 321 |
+
if log_type in get_available_trackers():
|
| 322 |
+
tracker_init = LOGGER_TYPE_TO_CLASS[str(log_type)]
|
| 323 |
+
if getattr(tracker_init, "requires_logging_directory"):
|
| 324 |
+
if logging_dir is None:
|
| 325 |
+
raise ValueError(
|
| 326 |
+
f"Logging with `{str(log_type)}` requires a `logging_dir` to be passed in."
|
| 327 |
+
)
|
| 328 |
+
loggers.append(log_type)
|
| 329 |
+
else:
|
| 330 |
+
logger.info(f"Tried adding logger {log_type}, but package is unavailable in the system.")
|
| 331 |
+
|
| 332 |
+
return loggers
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# This library is free software; you can redistribute it and/or
|
| 3 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 4 |
+
# License as published by the Free Software Foundation; either
|
| 5 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 6 |
+
#
|
| 7 |
+
# This library is distributed in the hope that it will be useful,
|
| 8 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 9 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 10 |
+
# Lesser General Public License for more details.
|
| 11 |
+
#
|
| 12 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 13 |
+
# License along with this library; if not, write to the Free Software
|
| 14 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 15 |
+
# 02110-1301 USA
|
| 16 |
+
######################### END LICENSE BLOCK #########################
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
from .compat import PY2, PY3
|
| 20 |
+
from .universaldetector import UniversalDetector
|
| 21 |
+
from .version import __version__, VERSION
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def detect(byte_str):
|
| 25 |
+
"""
|
| 26 |
+
Detect the encoding of the given byte string.
|
| 27 |
+
|
| 28 |
+
:param byte_str: The byte sequence to examine.
|
| 29 |
+
:type byte_str: ``bytes`` or ``bytearray``
|
| 30 |
+
"""
|
| 31 |
+
if not isinstance(byte_str, bytearray):
|
| 32 |
+
if not isinstance(byte_str, bytes):
|
| 33 |
+
raise TypeError('Expected object of type bytes or bytearray, got: '
|
| 34 |
+
'{0}'.format(type(byte_str)))
|
| 35 |
+
else:
|
| 36 |
+
byte_str = bytearray(byte_str)
|
| 37 |
+
detector = UniversalDetector()
|
| 38 |
+
detector.feed(byte_str)
|
| 39 |
+
return detector.close()
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/codingstatemachine.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is mozilla.org code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
import logging
|
| 29 |
+
|
| 30 |
+
from .enums import MachineState
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class CodingStateMachine(object):
|
| 34 |
+
"""
|
| 35 |
+
A state machine to verify a byte sequence for a particular encoding. For
|
| 36 |
+
each byte the detector receives, it will feed that byte to every active
|
| 37 |
+
state machine available, one byte at a time. The state machine changes its
|
| 38 |
+
state based on its previous state and the byte it receives. There are 3
|
| 39 |
+
states in a state machine that are of interest to an auto-detector:
|
| 40 |
+
|
| 41 |
+
START state: This is the state to start with, or a legal byte sequence
|
| 42 |
+
(i.e. a valid code point) for character has been identified.
|
| 43 |
+
|
| 44 |
+
ME state: This indicates that the state machine identified a byte sequence
|
| 45 |
+
that is specific to the charset it is designed for and that
|
| 46 |
+
there is no other possible encoding which can contain this byte
|
| 47 |
+
sequence. This will to lead to an immediate positive answer for
|
| 48 |
+
the detector.
|
| 49 |
+
|
| 50 |
+
ERROR state: This indicates the state machine identified an illegal byte
|
| 51 |
+
sequence for that encoding. This will lead to an immediate
|
| 52 |
+
negative answer for this encoding. Detector will exclude this
|
| 53 |
+
encoding from consideration from here on.
|
| 54 |
+
"""
|
| 55 |
+
def __init__(self, sm):
|
| 56 |
+
self._model = sm
|
| 57 |
+
self._curr_byte_pos = 0
|
| 58 |
+
self._curr_char_len = 0
|
| 59 |
+
self._curr_state = None
|
| 60 |
+
self.logger = logging.getLogger(__name__)
|
| 61 |
+
self.reset()
|
| 62 |
+
|
| 63 |
+
def reset(self):
|
| 64 |
+
self._curr_state = MachineState.START
|
| 65 |
+
|
| 66 |
+
def next_state(self, c):
|
| 67 |
+
# for each byte we get its class
|
| 68 |
+
# if it is first byte, we also get byte length
|
| 69 |
+
byte_class = self._model['class_table'][c]
|
| 70 |
+
if self._curr_state == MachineState.START:
|
| 71 |
+
self._curr_byte_pos = 0
|
| 72 |
+
self._curr_char_len = self._model['char_len_table'][byte_class]
|
| 73 |
+
# from byte's class and state_table, we get its next state
|
| 74 |
+
curr_state = (self._curr_state * self._model['class_factor']
|
| 75 |
+
+ byte_class)
|
| 76 |
+
self._curr_state = self._model['state_table'][curr_state]
|
| 77 |
+
self._curr_byte_pos += 1
|
| 78 |
+
return self._curr_state
|
| 79 |
+
|
| 80 |
+
def get_current_charlen(self):
|
| 81 |
+
return self._curr_char_len
|
| 82 |
+
|
| 83 |
+
def get_coding_state_machine(self):
|
| 84 |
+
return self._model['name']
|
| 85 |
+
|
| 86 |
+
@property
|
| 87 |
+
def language(self):
|
| 88 |
+
return self._model['language']
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/cp949prober.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is mozilla.org code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
from .chardistribution import EUCKRDistributionAnalysis
|
| 29 |
+
from .codingstatemachine import CodingStateMachine
|
| 30 |
+
from .mbcharsetprober import MultiByteCharSetProber
|
| 31 |
+
from .mbcssm import CP949_SM_MODEL
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class CP949Prober(MultiByteCharSetProber):
|
| 35 |
+
def __init__(self):
|
| 36 |
+
super(CP949Prober, self).__init__()
|
| 37 |
+
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
| 38 |
+
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
| 39 |
+
# not different.
|
| 40 |
+
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
| 41 |
+
self.reset()
|
| 42 |
+
|
| 43 |
+
@property
|
| 44 |
+
def charset_name(self):
|
| 45 |
+
return "CP949"
|
| 46 |
+
|
| 47 |
+
@property
|
| 48 |
+
def language(self):
|
| 49 |
+
return "Korean"
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/enums.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
All of the Enums that are used throughout the chardet package.
|
| 3 |
+
|
| 4 |
+
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class InputState(object):
|
| 9 |
+
"""
|
| 10 |
+
This enum represents the different states a universal detector can be in.
|
| 11 |
+
"""
|
| 12 |
+
PURE_ASCII = 0
|
| 13 |
+
ESC_ASCII = 1
|
| 14 |
+
HIGH_BYTE = 2
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class LanguageFilter(object):
|
| 18 |
+
"""
|
| 19 |
+
This enum represents the different language filters we can apply to a
|
| 20 |
+
``UniversalDetector``.
|
| 21 |
+
"""
|
| 22 |
+
CHINESE_SIMPLIFIED = 0x01
|
| 23 |
+
CHINESE_TRADITIONAL = 0x02
|
| 24 |
+
JAPANESE = 0x04
|
| 25 |
+
KOREAN = 0x08
|
| 26 |
+
NON_CJK = 0x10
|
| 27 |
+
ALL = 0x1F
|
| 28 |
+
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
| 29 |
+
CJK = CHINESE | JAPANESE | KOREAN
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class ProbingState(object):
|
| 33 |
+
"""
|
| 34 |
+
This enum represents the different states a prober can be in.
|
| 35 |
+
"""
|
| 36 |
+
DETECTING = 0
|
| 37 |
+
FOUND_IT = 1
|
| 38 |
+
NOT_ME = 2
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class MachineState(object):
|
| 42 |
+
"""
|
| 43 |
+
This enum represents the different states a state machine can be in.
|
| 44 |
+
"""
|
| 45 |
+
START = 0
|
| 46 |
+
ERROR = 1
|
| 47 |
+
ITS_ME = 2
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class SequenceLikelihood(object):
|
| 51 |
+
"""
|
| 52 |
+
This enum represents the likelihood of a character following the previous one.
|
| 53 |
+
"""
|
| 54 |
+
NEGATIVE = 0
|
| 55 |
+
UNLIKELY = 1
|
| 56 |
+
LIKELY = 2
|
| 57 |
+
POSITIVE = 3
|
| 58 |
+
|
| 59 |
+
@classmethod
|
| 60 |
+
def get_num_categories(cls):
|
| 61 |
+
""":returns: The number of likelihood categories in the enum."""
|
| 62 |
+
return 4
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class CharacterCategory(object):
|
| 66 |
+
"""
|
| 67 |
+
This enum represents the different categories language models for
|
| 68 |
+
``SingleByteCharsetProber`` put characters into.
|
| 69 |
+
|
| 70 |
+
Anything less than CONTROL is considered a letter.
|
| 71 |
+
"""
|
| 72 |
+
UNDEFINED = 255
|
| 73 |
+
LINE_BREAK = 254
|
| 74 |
+
SYMBOL = 253
|
| 75 |
+
DIGIT = 252
|
| 76 |
+
CONTROL = 251
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/eucjpprober.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is mozilla.org code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
from .enums import ProbingState, MachineState
|
| 29 |
+
from .mbcharsetprober import MultiByteCharSetProber
|
| 30 |
+
from .codingstatemachine import CodingStateMachine
|
| 31 |
+
from .chardistribution import EUCJPDistributionAnalysis
|
| 32 |
+
from .jpcntx import EUCJPContextAnalysis
|
| 33 |
+
from .mbcssm import EUCJP_SM_MODEL
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class EUCJPProber(MultiByteCharSetProber):
|
| 37 |
+
def __init__(self):
|
| 38 |
+
super(EUCJPProber, self).__init__()
|
| 39 |
+
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
| 40 |
+
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
| 41 |
+
self.context_analyzer = EUCJPContextAnalysis()
|
| 42 |
+
self.reset()
|
| 43 |
+
|
| 44 |
+
def reset(self):
|
| 45 |
+
super(EUCJPProber, self).reset()
|
| 46 |
+
self.context_analyzer.reset()
|
| 47 |
+
|
| 48 |
+
@property
|
| 49 |
+
def charset_name(self):
|
| 50 |
+
return "EUC-JP"
|
| 51 |
+
|
| 52 |
+
@property
|
| 53 |
+
def language(self):
|
| 54 |
+
return "Japanese"
|
| 55 |
+
|
| 56 |
+
def feed(self, byte_str):
|
| 57 |
+
for i in range(len(byte_str)):
|
| 58 |
+
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
| 59 |
+
coding_state = self.coding_sm.next_state(byte_str[i])
|
| 60 |
+
if coding_state == MachineState.ERROR:
|
| 61 |
+
self.logger.debug('%s %s prober hit error at byte %s',
|
| 62 |
+
self.charset_name, self.language, i)
|
| 63 |
+
self._state = ProbingState.NOT_ME
|
| 64 |
+
break
|
| 65 |
+
elif coding_state == MachineState.ITS_ME:
|
| 66 |
+
self._state = ProbingState.FOUND_IT
|
| 67 |
+
break
|
| 68 |
+
elif coding_state == MachineState.START:
|
| 69 |
+
char_len = self.coding_sm.get_current_charlen()
|
| 70 |
+
if i == 0:
|
| 71 |
+
self._last_char[1] = byte_str[0]
|
| 72 |
+
self.context_analyzer.feed(self._last_char, char_len)
|
| 73 |
+
self.distribution_analyzer.feed(self._last_char, char_len)
|
| 74 |
+
else:
|
| 75 |
+
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
| 76 |
+
char_len)
|
| 77 |
+
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
| 78 |
+
char_len)
|
| 79 |
+
|
| 80 |
+
self._last_char[0] = byte_str[-1]
|
| 81 |
+
|
| 82 |
+
if self.state == ProbingState.DETECTING:
|
| 83 |
+
if (self.context_analyzer.got_enough_data() and
|
| 84 |
+
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
| 85 |
+
self._state = ProbingState.FOUND_IT
|
| 86 |
+
|
| 87 |
+
return self.state
|
| 88 |
+
|
| 89 |
+
def get_confidence(self):
|
| 90 |
+
context_conf = self.context_analyzer.get_confidence()
|
| 91 |
+
distrib_conf = self.distribution_analyzer.get_confidence()
|
| 92 |
+
return max(context_conf, distrib_conf)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/euctwprober.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is mozilla.org code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
from .mbcharsetprober import MultiByteCharSetProber
|
| 29 |
+
from .codingstatemachine import CodingStateMachine
|
| 30 |
+
from .chardistribution import EUCTWDistributionAnalysis
|
| 31 |
+
from .mbcssm import EUCTW_SM_MODEL
|
| 32 |
+
|
| 33 |
+
class EUCTWProber(MultiByteCharSetProber):
|
| 34 |
+
def __init__(self):
|
| 35 |
+
super(EUCTWProber, self).__init__()
|
| 36 |
+
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
| 37 |
+
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
| 38 |
+
self.reset()
|
| 39 |
+
|
| 40 |
+
@property
|
| 41 |
+
def charset_name(self):
|
| 42 |
+
return "EUC-TW"
|
| 43 |
+
|
| 44 |
+
@property
|
| 45 |
+
def language(self):
|
| 46 |
+
return "Taiwan"
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/gb2312prober.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is mozilla.org code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
from .mbcharsetprober import MultiByteCharSetProber
|
| 29 |
+
from .codingstatemachine import CodingStateMachine
|
| 30 |
+
from .chardistribution import GB2312DistributionAnalysis
|
| 31 |
+
from .mbcssm import GB2312_SM_MODEL
|
| 32 |
+
|
| 33 |
+
class GB2312Prober(MultiByteCharSetProber):
|
| 34 |
+
def __init__(self):
|
| 35 |
+
super(GB2312Prober, self).__init__()
|
| 36 |
+
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
| 37 |
+
self.distribution_analyzer = GB2312DistributionAnalysis()
|
| 38 |
+
self.reset()
|
| 39 |
+
|
| 40 |
+
@property
|
| 41 |
+
def charset_name(self):
|
| 42 |
+
return "GB2312"
|
| 43 |
+
|
| 44 |
+
@property
|
| 45 |
+
def language(self):
|
| 46 |
+
return "Chinese"
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/hebrewprober.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Universal charset detector code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Shy Shalom
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 2005
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
from .charsetprober import CharSetProber
|
| 29 |
+
from .enums import ProbingState
|
| 30 |
+
|
| 31 |
+
# This prober doesn't actually recognize a language or a charset.
|
| 32 |
+
# It is a helper prober for the use of the Hebrew model probers
|
| 33 |
+
|
| 34 |
+
### General ideas of the Hebrew charset recognition ###
|
| 35 |
+
#
|
| 36 |
+
# Four main charsets exist in Hebrew:
|
| 37 |
+
# "ISO-8859-8" - Visual Hebrew
|
| 38 |
+
# "windows-1255" - Logical Hebrew
|
| 39 |
+
# "ISO-8859-8-I" - Logical Hebrew
|
| 40 |
+
# "x-mac-hebrew" - ?? Logical Hebrew ??
|
| 41 |
+
#
|
| 42 |
+
# Both "ISO" charsets use a completely identical set of code points, whereas
|
| 43 |
+
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
|
| 44 |
+
# these code points. windows-1255 defines additional characters in the range
|
| 45 |
+
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
|
| 46 |
+
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
|
| 47 |
+
# x-mac-hebrew defines similar additional code points but with a different
|
| 48 |
+
# mapping.
|
| 49 |
+
#
|
| 50 |
+
# As far as an average Hebrew text with no diacritics is concerned, all four
|
| 51 |
+
# charsets are identical with respect to code points. Meaning that for the
|
| 52 |
+
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
|
| 53 |
+
# (including final letters).
|
| 54 |
+
#
|
| 55 |
+
# The dominant difference between these charsets is their directionality.
|
| 56 |
+
# "Visual" directionality means that the text is ordered as if the renderer is
|
| 57 |
+
# not aware of a BIDI rendering algorithm. The renderer sees the text and
|
| 58 |
+
# draws it from left to right. The text itself when ordered naturally is read
|
| 59 |
+
# backwards. A buffer of Visual Hebrew generally looks like so:
|
| 60 |
+
# "[last word of first line spelled backwards] [whole line ordered backwards
|
| 61 |
+
# and spelled backwards] [first word of first line spelled backwards]
|
| 62 |
+
# [end of line] [last word of second line] ... etc' "
|
| 63 |
+
# adding punctuation marks, numbers and English text to visual text is
|
| 64 |
+
# naturally also "visual" and from left to right.
|
| 65 |
+
#
|
| 66 |
+
# "Logical" directionality means the text is ordered "naturally" according to
|
| 67 |
+
# the order it is read. It is the responsibility of the renderer to display
|
| 68 |
+
# the text from right to left. A BIDI algorithm is used to place general
|
| 69 |
+
# punctuation marks, numbers and English text in the text.
|
| 70 |
+
#
|
| 71 |
+
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
|
| 72 |
+
# what little evidence I could find, it seems that its general directionality
|
| 73 |
+
# is Logical.
|
| 74 |
+
#
|
| 75 |
+
# To sum up all of the above, the Hebrew probing mechanism knows about two
|
| 76 |
+
# charsets:
|
| 77 |
+
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
|
| 78 |
+
# backwards while line order is natural. For charset recognition purposes
|
| 79 |
+
# the line order is unimportant (In fact, for this implementation, even
|
| 80 |
+
# word order is unimportant).
|
| 81 |
+
# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
|
| 82 |
+
#
|
| 83 |
+
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
|
| 84 |
+
# specifically identified.
|
| 85 |
+
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
|
| 86 |
+
# that contain special punctuation marks or diacritics is displayed with
|
| 87 |
+
# some unconverted characters showing as question marks. This problem might
|
| 88 |
+
# be corrected using another model prober for x-mac-hebrew. Due to the fact
|
| 89 |
+
# that x-mac-hebrew texts are so rare, writing another model prober isn't
|
| 90 |
+
# worth the effort and performance hit.
|
| 91 |
+
#
|
| 92 |
+
#### The Prober ####
|
| 93 |
+
#
|
| 94 |
+
# The prober is divided between two SBCharSetProbers and a HebrewProber,
|
| 95 |
+
# all of which are managed, created, fed data, inquired and deleted by the
|
| 96 |
+
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
|
| 97 |
+
# fact some kind of Hebrew, Logical or Visual. The final decision about which
|
| 98 |
+
# one is it is made by the HebrewProber by combining final-letter scores
|
| 99 |
+
# with the scores of the two SBCharSetProbers to produce a final answer.
|
| 100 |
+
#
|
| 101 |
+
# The SBCSGroupProber is responsible for stripping the original text of HTML
|
| 102 |
+
# tags, English characters, numbers, low-ASCII punctuation characters, spaces
|
| 103 |
+
# and new lines. It reduces any sequence of such characters to a single space.
|
| 104 |
+
# The buffer fed to each prober in the SBCS group prober is pure text in
|
| 105 |
+
# high-ASCII.
|
| 106 |
+
# The two SBCharSetProbers (model probers) share the same language model:
|
| 107 |
+
# Win1255Model.
|
| 108 |
+
# The first SBCharSetProber uses the model normally as any other
|
| 109 |
+
# SBCharSetProber does, to recognize windows-1255, upon which this model was
|
| 110 |
+
# built. The second SBCharSetProber is told to make the pair-of-letter
|
| 111 |
+
# lookup in the language model backwards. This in practice exactly simulates
|
| 112 |
+
# a visual Hebrew model using the windows-1255 logical Hebrew model.
|
| 113 |
+
#
|
| 114 |
+
# The HebrewProber is not using any language model. All it does is look for
|
| 115 |
+
# final-letter evidence suggesting the text is either logical Hebrew or visual
|
| 116 |
+
# Hebrew. Disjointed from the model probers, the results of the HebrewProber
|
| 117 |
+
# alone are meaningless. HebrewProber always returns 0.00 as confidence
|
| 118 |
+
# since it never identifies a charset by itself. Instead, the pointer to the
|
| 119 |
+
# HebrewProber is passed to the model probers as a helper "Name Prober".
|
| 120 |
+
# When the Group prober receives a positive identification from any prober,
|
| 121 |
+
# it asks for the name of the charset identified. If the prober queried is a
|
| 122 |
+
# Hebrew model prober, the model prober forwards the call to the
|
| 123 |
+
# HebrewProber to make the final decision. In the HebrewProber, the
|
| 124 |
+
# decision is made according to the final-letters scores maintained and Both
|
| 125 |
+
# model probers scores. The answer is returned in the form of the name of the
|
| 126 |
+
# charset identified, either "windows-1255" or "ISO-8859-8".
|
| 127 |
+
|
| 128 |
+
class HebrewProber(CharSetProber):
|
| 129 |
+
# windows-1255 / ISO-8859-8 code points of interest
|
| 130 |
+
FINAL_KAF = 0xea
|
| 131 |
+
NORMAL_KAF = 0xeb
|
| 132 |
+
FINAL_MEM = 0xed
|
| 133 |
+
NORMAL_MEM = 0xee
|
| 134 |
+
FINAL_NUN = 0xef
|
| 135 |
+
NORMAL_NUN = 0xf0
|
| 136 |
+
FINAL_PE = 0xf3
|
| 137 |
+
NORMAL_PE = 0xf4
|
| 138 |
+
FINAL_TSADI = 0xf5
|
| 139 |
+
NORMAL_TSADI = 0xf6
|
| 140 |
+
|
| 141 |
+
# Minimum Visual vs Logical final letter score difference.
|
| 142 |
+
# If the difference is below this, don't rely solely on the final letter score
|
| 143 |
+
# distance.
|
| 144 |
+
MIN_FINAL_CHAR_DISTANCE = 5
|
| 145 |
+
|
| 146 |
+
# Minimum Visual vs Logical model score difference.
|
| 147 |
+
# If the difference is below this, don't rely at all on the model score
|
| 148 |
+
# distance.
|
| 149 |
+
MIN_MODEL_DISTANCE = 0.01
|
| 150 |
+
|
| 151 |
+
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
| 152 |
+
LOGICAL_HEBREW_NAME = "windows-1255"
|
| 153 |
+
|
| 154 |
+
def __init__(self):
|
| 155 |
+
super(HebrewProber, self).__init__()
|
| 156 |
+
self._final_char_logical_score = None
|
| 157 |
+
self._final_char_visual_score = None
|
| 158 |
+
self._prev = None
|
| 159 |
+
self._before_prev = None
|
| 160 |
+
self._logical_prober = None
|
| 161 |
+
self._visual_prober = None
|
| 162 |
+
self.reset()
|
| 163 |
+
|
| 164 |
+
def reset(self):
|
| 165 |
+
self._final_char_logical_score = 0
|
| 166 |
+
self._final_char_visual_score = 0
|
| 167 |
+
# The two last characters seen in the previous buffer,
|
| 168 |
+
# mPrev and mBeforePrev are initialized to space in order to simulate
|
| 169 |
+
# a word delimiter at the beginning of the data
|
| 170 |
+
self._prev = ' '
|
| 171 |
+
self._before_prev = ' '
|
| 172 |
+
# These probers are owned by the group prober.
|
| 173 |
+
|
| 174 |
+
def set_model_probers(self, logicalProber, visualProber):
|
| 175 |
+
self._logical_prober = logicalProber
|
| 176 |
+
self._visual_prober = visualProber
|
| 177 |
+
|
| 178 |
+
def is_final(self, c):
|
| 179 |
+
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
| 180 |
+
self.FINAL_PE, self.FINAL_TSADI]
|
| 181 |
+
|
| 182 |
+
def is_non_final(self, c):
|
| 183 |
+
# The normal Tsadi is not a good Non-Final letter due to words like
|
| 184 |
+
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
|
| 185 |
+
# apostrophe is converted to a space in FilterWithoutEnglishLetters
|
| 186 |
+
# causing the Non-Final tsadi to appear at an end of a word even
|
| 187 |
+
# though this is not the case in the original text.
|
| 188 |
+
# The letters Pe and Kaf rarely display a related behavior of not being
|
| 189 |
+
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
|
| 190 |
+
# for example legally end with a Non-Final Pe or Kaf. However, the
|
| 191 |
+
# benefit of these letters as Non-Final letters outweighs the damage
|
| 192 |
+
# since these words are quite rare.
|
| 193 |
+
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
| 194 |
+
self.NORMAL_NUN, self.NORMAL_PE]
|
| 195 |
+
|
| 196 |
+
def feed(self, byte_str):
|
| 197 |
+
# Final letter analysis for logical-visual decision.
|
| 198 |
+
# Look for evidence that the received buffer is either logical Hebrew
|
| 199 |
+
# or visual Hebrew.
|
| 200 |
+
# The following cases are checked:
|
| 201 |
+
# 1) A word longer than 1 letter, ending with a final letter. This is
|
| 202 |
+
# an indication that the text is laid out "naturally" since the
|
| 203 |
+
# final letter really appears at the end. +1 for logical score.
|
| 204 |
+
# 2) A word longer than 1 letter, ending with a Non-Final letter. In
|
| 205 |
+
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
|
| 206 |
+
# should not end with the Non-Final form of that letter. Exceptions
|
| 207 |
+
# to this rule are mentioned above in isNonFinal(). This is an
|
| 208 |
+
# indication that the text is laid out backwards. +1 for visual
|
| 209 |
+
# score
|
| 210 |
+
# 3) A word longer than 1 letter, starting with a final letter. Final
|
| 211 |
+
# letters should not appear at the beginning of a word. This is an
|
| 212 |
+
# indication that the text is laid out backwards. +1 for visual
|
| 213 |
+
# score.
|
| 214 |
+
#
|
| 215 |
+
# The visual score and logical score are accumulated throughout the
|
| 216 |
+
# text and are finally checked against each other in GetCharSetName().
|
| 217 |
+
# No checking for final letters in the middle of words is done since
|
| 218 |
+
# that case is not an indication for either Logical or Visual text.
|
| 219 |
+
#
|
| 220 |
+
# We automatically filter out all 7-bit characters (replace them with
|
| 221 |
+
# spaces) so the word boundary detection works properly. [MAP]
|
| 222 |
+
|
| 223 |
+
if self.state == ProbingState.NOT_ME:
|
| 224 |
+
# Both model probers say it's not them. No reason to continue.
|
| 225 |
+
return ProbingState.NOT_ME
|
| 226 |
+
|
| 227 |
+
byte_str = self.filter_high_byte_only(byte_str)
|
| 228 |
+
|
| 229 |
+
for cur in byte_str:
|
| 230 |
+
if cur == ' ':
|
| 231 |
+
# We stand on a space - a word just ended
|
| 232 |
+
if self._before_prev != ' ':
|
| 233 |
+
# next-to-last char was not a space so self._prev is not a
|
| 234 |
+
# 1 letter word
|
| 235 |
+
if self.is_final(self._prev):
|
| 236 |
+
# case (1) [-2:not space][-1:final letter][cur:space]
|
| 237 |
+
self._final_char_logical_score += 1
|
| 238 |
+
elif self.is_non_final(self._prev):
|
| 239 |
+
# case (2) [-2:not space][-1:Non-Final letter][
|
| 240 |
+
# cur:space]
|
| 241 |
+
self._final_char_visual_score += 1
|
| 242 |
+
else:
|
| 243 |
+
# Not standing on a space
|
| 244 |
+
if ((self._before_prev == ' ') and
|
| 245 |
+
(self.is_final(self._prev)) and (cur != ' ')):
|
| 246 |
+
# case (3) [-2:space][-1:final letter][cur:not space]
|
| 247 |
+
self._final_char_visual_score += 1
|
| 248 |
+
self._before_prev = self._prev
|
| 249 |
+
self._prev = cur
|
| 250 |
+
|
| 251 |
+
# Forever detecting, till the end or until both model probers return
|
| 252 |
+
# ProbingState.NOT_ME (handled above)
|
| 253 |
+
return ProbingState.DETECTING
|
| 254 |
+
|
| 255 |
+
@property
|
| 256 |
+
def charset_name(self):
|
| 257 |
+
# Make the decision: is it Logical or Visual?
|
| 258 |
+
# If the final letter score distance is dominant enough, rely on it.
|
| 259 |
+
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
| 260 |
+
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
| 261 |
+
return self.LOGICAL_HEBREW_NAME
|
| 262 |
+
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
| 263 |
+
return self.VISUAL_HEBREW_NAME
|
| 264 |
+
|
| 265 |
+
# It's not dominant enough, try to rely on the model scores instead.
|
| 266 |
+
modelsub = (self._logical_prober.get_confidence()
|
| 267 |
+
- self._visual_prober.get_confidence())
|
| 268 |
+
if modelsub > self.MIN_MODEL_DISTANCE:
|
| 269 |
+
return self.LOGICAL_HEBREW_NAME
|
| 270 |
+
if modelsub < -self.MIN_MODEL_DISTANCE:
|
| 271 |
+
return self.VISUAL_HEBREW_NAME
|
| 272 |
+
|
| 273 |
+
# Still no good, back to final letter distance, maybe it'll save the
|
| 274 |
+
# day.
|
| 275 |
+
if finalsub < 0.0:
|
| 276 |
+
return self.VISUAL_HEBREW_NAME
|
| 277 |
+
|
| 278 |
+
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
| 279 |
+
# Logical.
|
| 280 |
+
return self.LOGICAL_HEBREW_NAME
|
| 281 |
+
|
| 282 |
+
@property
|
| 283 |
+
def language(self):
|
| 284 |
+
return 'Hebrew'
|
| 285 |
+
|
| 286 |
+
@property
|
| 287 |
+
def state(self):
|
| 288 |
+
# Remain active as long as any of the model probers are active.
|
| 289 |
+
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
| 290 |
+
(self._visual_prober.state == ProbingState.NOT_ME):
|
| 291 |
+
return ProbingState.NOT_ME
|
| 292 |
+
return ProbingState.DETECTING
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/jpcntx.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Communicator client code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
| 30 |
+
jp2CharContext = (
|
| 31 |
+
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
|
| 32 |
+
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
|
| 33 |
+
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
|
| 34 |
+
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
|
| 35 |
+
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
| 36 |
+
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
|
| 37 |
+
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
| 38 |
+
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
|
| 39 |
+
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
| 40 |
+
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
|
| 41 |
+
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
|
| 42 |
+
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
|
| 43 |
+
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
|
| 44 |
+
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
|
| 45 |
+
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
|
| 46 |
+
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
|
| 47 |
+
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
|
| 48 |
+
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
|
| 49 |
+
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
|
| 50 |
+
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
|
| 51 |
+
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
|
| 52 |
+
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
|
| 53 |
+
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
|
| 54 |
+
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
|
| 55 |
+
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
|
| 56 |
+
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
|
| 57 |
+
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
|
| 58 |
+
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
|
| 59 |
+
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
|
| 60 |
+
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
|
| 61 |
+
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
|
| 62 |
+
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
|
| 63 |
+
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
|
| 64 |
+
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
|
| 65 |
+
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
|
| 66 |
+
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
|
| 67 |
+
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
|
| 68 |
+
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
|
| 69 |
+
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
|
| 70 |
+
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
|
| 71 |
+
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
|
| 72 |
+
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
|
| 73 |
+
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
|
| 74 |
+
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
|
| 75 |
+
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
|
| 76 |
+
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
|
| 77 |
+
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
|
| 78 |
+
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
|
| 79 |
+
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
|
| 80 |
+
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
|
| 81 |
+
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
|
| 82 |
+
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
|
| 83 |
+
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
|
| 84 |
+
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
|
| 85 |
+
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
|
| 86 |
+
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
|
| 87 |
+
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
|
| 88 |
+
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
|
| 89 |
+
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
|
| 90 |
+
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
|
| 91 |
+
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
|
| 92 |
+
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
|
| 93 |
+
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
|
| 94 |
+
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
|
| 95 |
+
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
|
| 96 |
+
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
|
| 97 |
+
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
|
| 98 |
+
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
|
| 99 |
+
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
|
| 100 |
+
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
|
| 101 |
+
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
|
| 102 |
+
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
|
| 103 |
+
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
|
| 104 |
+
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
|
| 105 |
+
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
|
| 106 |
+
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
|
| 107 |
+
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
|
| 108 |
+
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
| 109 |
+
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
|
| 110 |
+
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
|
| 111 |
+
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
|
| 112 |
+
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
|
| 113 |
+
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
class JapaneseContextAnalysis(object):
|
| 117 |
+
NUM_OF_CATEGORY = 6
|
| 118 |
+
DONT_KNOW = -1
|
| 119 |
+
ENOUGH_REL_THRESHOLD = 100
|
| 120 |
+
MAX_REL_THRESHOLD = 1000
|
| 121 |
+
MINIMUM_DATA_THRESHOLD = 4
|
| 122 |
+
|
| 123 |
+
def __init__(self):
|
| 124 |
+
self._total_rel = None
|
| 125 |
+
self._rel_sample = None
|
| 126 |
+
self._need_to_skip_char_num = None
|
| 127 |
+
self._last_char_order = None
|
| 128 |
+
self._done = None
|
| 129 |
+
self.reset()
|
| 130 |
+
|
| 131 |
+
def reset(self):
|
| 132 |
+
self._total_rel = 0 # total sequence received
|
| 133 |
+
# category counters, each integer counts sequence in its category
|
| 134 |
+
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
| 135 |
+
# if last byte in current buffer is not the last byte of a character,
|
| 136 |
+
# we need to know how many bytes to skip in next buffer
|
| 137 |
+
self._need_to_skip_char_num = 0
|
| 138 |
+
self._last_char_order = -1 # The order of previous char
|
| 139 |
+
# If this flag is set to True, detection is done and conclusion has
|
| 140 |
+
# been made
|
| 141 |
+
self._done = False
|
| 142 |
+
|
| 143 |
+
def feed(self, byte_str, num_bytes):
|
| 144 |
+
if self._done:
|
| 145 |
+
return
|
| 146 |
+
|
| 147 |
+
# The buffer we got is byte oriented, and a character may span in more than one
|
| 148 |
+
# buffers. In case the last one or two byte in last buffer is not
|
| 149 |
+
# complete, we record how many byte needed to complete that character
|
| 150 |
+
# and skip these bytes here. We can choose to record those bytes as
|
| 151 |
+
# well and analyse the character once it is complete, but since a
|
| 152 |
+
# character will not make much difference, by simply skipping
|
| 153 |
+
# this character will simply our logic and improve performance.
|
| 154 |
+
i = self._need_to_skip_char_num
|
| 155 |
+
while i < num_bytes:
|
| 156 |
+
order, char_len = self.get_order(byte_str[i:i + 2])
|
| 157 |
+
i += char_len
|
| 158 |
+
if i > num_bytes:
|
| 159 |
+
self._need_to_skip_char_num = i - num_bytes
|
| 160 |
+
self._last_char_order = -1
|
| 161 |
+
else:
|
| 162 |
+
if (order != -1) and (self._last_char_order != -1):
|
| 163 |
+
self._total_rel += 1
|
| 164 |
+
if self._total_rel > self.MAX_REL_THRESHOLD:
|
| 165 |
+
self._done = True
|
| 166 |
+
break
|
| 167 |
+
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
| 168 |
+
self._last_char_order = order
|
| 169 |
+
|
| 170 |
+
def got_enough_data(self):
|
| 171 |
+
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
| 172 |
+
|
| 173 |
+
def get_confidence(self):
|
| 174 |
+
# This is just one way to calculate confidence. It works well for me.
|
| 175 |
+
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
| 176 |
+
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
| 177 |
+
else:
|
| 178 |
+
return self.DONT_KNOW
|
| 179 |
+
|
| 180 |
+
def get_order(self, byte_str):
|
| 181 |
+
return -1, 1
|
| 182 |
+
|
| 183 |
+
class SJISContextAnalysis(JapaneseContextAnalysis):
|
| 184 |
+
def __init__(self):
|
| 185 |
+
super(SJISContextAnalysis, self).__init__()
|
| 186 |
+
self._charset_name = "SHIFT_JIS"
|
| 187 |
+
|
| 188 |
+
@property
|
| 189 |
+
def charset_name(self):
|
| 190 |
+
return self._charset_name
|
| 191 |
+
|
| 192 |
+
def get_order(self, byte_str):
|
| 193 |
+
if not byte_str:
|
| 194 |
+
return -1, 1
|
| 195 |
+
# find out current char's byte length
|
| 196 |
+
first_char = byte_str[0]
|
| 197 |
+
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
| 198 |
+
char_len = 2
|
| 199 |
+
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
| 200 |
+
self._charset_name = "CP932"
|
| 201 |
+
else:
|
| 202 |
+
char_len = 1
|
| 203 |
+
|
| 204 |
+
# return its order if it is hiragana
|
| 205 |
+
if len(byte_str) > 1:
|
| 206 |
+
second_char = byte_str[1]
|
| 207 |
+
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
| 208 |
+
return second_char - 0x9F, char_len
|
| 209 |
+
|
| 210 |
+
return -1, char_len
|
| 211 |
+
|
| 212 |
+
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
| 213 |
+
def get_order(self, byte_str):
|
| 214 |
+
if not byte_str:
|
| 215 |
+
return -1, 1
|
| 216 |
+
# find out current char's byte length
|
| 217 |
+
first_char = byte_str[0]
|
| 218 |
+
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
| 219 |
+
char_len = 2
|
| 220 |
+
elif first_char == 0x8F:
|
| 221 |
+
char_len = 3
|
| 222 |
+
else:
|
| 223 |
+
char_len = 1
|
| 224 |
+
|
| 225 |
+
# return its order if it is hiragana
|
| 226 |
+
if len(byte_str) > 1:
|
| 227 |
+
second_char = byte_str[1]
|
| 228 |
+
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
| 229 |
+
return second_char - 0xA1, char_len
|
| 230 |
+
|
| 231 |
+
return -1, char_len
|
| 232 |
+
|
| 233 |
+
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langcyrillicmodel.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Communicator client code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
# KOI8-R language model
|
| 29 |
+
# Character Mapping Table:
|
| 30 |
+
KOI8R_char_to_order_map = (
|
| 31 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 32 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 33 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 34 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 35 |
+
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
| 36 |
+
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
| 37 |
+
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
| 38 |
+
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
| 39 |
+
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
|
| 40 |
+
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
|
| 41 |
+
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
|
| 42 |
+
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
|
| 43 |
+
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
|
| 44 |
+
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
|
| 45 |
+
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
|
| 46 |
+
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
win1251_char_to_order_map = (
|
| 50 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 51 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 52 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 53 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 54 |
+
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
| 55 |
+
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
| 56 |
+
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
| 57 |
+
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
| 58 |
+
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
| 59 |
+
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
| 60 |
+
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
| 61 |
+
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
|
| 62 |
+
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
| 63 |
+
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
| 64 |
+
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
| 65 |
+
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
latin5_char_to_order_map = (
|
| 69 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 70 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 71 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 72 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 73 |
+
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
| 74 |
+
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
| 75 |
+
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
| 76 |
+
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
| 77 |
+
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
| 78 |
+
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
| 79 |
+
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
| 80 |
+
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
| 81 |
+
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
| 82 |
+
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
| 83 |
+
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
| 84 |
+
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
macCyrillic_char_to_order_map = (
|
| 88 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 89 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 90 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 91 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 92 |
+
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
| 93 |
+
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
| 94 |
+
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
| 95 |
+
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
| 96 |
+
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
| 97 |
+
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
| 98 |
+
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
| 99 |
+
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
| 100 |
+
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
| 101 |
+
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
|
| 102 |
+
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
| 103 |
+
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
IBM855_char_to_order_map = (
|
| 107 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 108 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 109 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 110 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 111 |
+
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
| 112 |
+
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
| 113 |
+
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
| 114 |
+
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
| 115 |
+
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
|
| 116 |
+
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
|
| 117 |
+
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
|
| 118 |
+
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
|
| 119 |
+
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
|
| 120 |
+
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
|
| 121 |
+
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
|
| 122 |
+
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
IBM866_char_to_order_map = (
|
| 126 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 127 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 128 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 129 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 130 |
+
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
| 131 |
+
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
| 132 |
+
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
| 133 |
+
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
| 134 |
+
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
| 135 |
+
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
| 136 |
+
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
| 137 |
+
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
| 138 |
+
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
| 139 |
+
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
| 140 |
+
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
| 141 |
+
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Model Table:
|
| 145 |
+
# total sequences: 100%
|
| 146 |
+
# first 512 sequences: 97.6601%
|
| 147 |
+
# first 1024 sequences: 2.3389%
|
| 148 |
+
# rest sequences: 0.1237%
|
| 149 |
+
# negative sequences: 0.0009%
|
| 150 |
+
RussianLangModel = (
|
| 151 |
+
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
|
| 152 |
+
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
|
| 153 |
+
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
|
| 154 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 155 |
+
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
|
| 156 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 157 |
+
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
| 158 |
+
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 159 |
+
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
| 160 |
+
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 161 |
+
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
|
| 162 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
| 163 |
+
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
|
| 164 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 165 |
+
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
|
| 166 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 167 |
+
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
|
| 168 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 169 |
+
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
|
| 170 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 171 |
+
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
|
| 172 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 173 |
+
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
|
| 174 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 175 |
+
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
|
| 176 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 177 |
+
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
|
| 178 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 179 |
+
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
|
| 180 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 181 |
+
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
|
| 182 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 183 |
+
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
|
| 184 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 185 |
+
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
|
| 186 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 187 |
+
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
|
| 188 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 189 |
+
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
|
| 190 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 191 |
+
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
|
| 192 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 193 |
+
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
|
| 194 |
+
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 195 |
+
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
|
| 196 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 197 |
+
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
|
| 198 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 199 |
+
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
|
| 200 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 201 |
+
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
|
| 202 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 203 |
+
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
|
| 204 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
| 205 |
+
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
|
| 206 |
+
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 207 |
+
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
|
| 208 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 209 |
+
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 210 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 211 |
+
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
|
| 212 |
+
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
| 213 |
+
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
|
| 214 |
+
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
|
| 215 |
+
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
|
| 216 |
+
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
|
| 217 |
+
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
|
| 218 |
+
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
|
| 219 |
+
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
|
| 220 |
+
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
|
| 221 |
+
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
|
| 222 |
+
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
| 223 |
+
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
|
| 224 |
+
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
|
| 225 |
+
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
|
| 226 |
+
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
|
| 227 |
+
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
|
| 228 |
+
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
|
| 229 |
+
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
|
| 230 |
+
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 231 |
+
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
|
| 232 |
+
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
| 233 |
+
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
|
| 234 |
+
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
|
| 235 |
+
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
|
| 236 |
+
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
|
| 237 |
+
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
|
| 238 |
+
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
|
| 239 |
+
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
|
| 240 |
+
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
| 241 |
+
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
|
| 242 |
+
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
|
| 243 |
+
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
|
| 244 |
+
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 245 |
+
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
|
| 246 |
+
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
| 247 |
+
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
|
| 248 |
+
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
|
| 249 |
+
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
|
| 250 |
+
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
|
| 251 |
+
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 252 |
+
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
|
| 253 |
+
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
|
| 254 |
+
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
| 255 |
+
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
|
| 256 |
+
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
|
| 257 |
+
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
| 258 |
+
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 259 |
+
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
| 260 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 261 |
+
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
|
| 262 |
+
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 263 |
+
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
|
| 264 |
+
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 265 |
+
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
|
| 266 |
+
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 267 |
+
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
| 268 |
+
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
| 269 |
+
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
|
| 270 |
+
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 271 |
+
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 272 |
+
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
|
| 273 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 274 |
+
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
|
| 275 |
+
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 276 |
+
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
|
| 277 |
+
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 278 |
+
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
Koi8rModel = {
|
| 282 |
+
'char_to_order_map': KOI8R_char_to_order_map,
|
| 283 |
+
'precedence_matrix': RussianLangModel,
|
| 284 |
+
'typical_positive_ratio': 0.976601,
|
| 285 |
+
'keep_english_letter': False,
|
| 286 |
+
'charset_name': "KOI8-R",
|
| 287 |
+
'language': 'Russian',
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
Win1251CyrillicModel = {
|
| 291 |
+
'char_to_order_map': win1251_char_to_order_map,
|
| 292 |
+
'precedence_matrix': RussianLangModel,
|
| 293 |
+
'typical_positive_ratio': 0.976601,
|
| 294 |
+
'keep_english_letter': False,
|
| 295 |
+
'charset_name': "windows-1251",
|
| 296 |
+
'language': 'Russian',
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
Latin5CyrillicModel = {
|
| 300 |
+
'char_to_order_map': latin5_char_to_order_map,
|
| 301 |
+
'precedence_matrix': RussianLangModel,
|
| 302 |
+
'typical_positive_ratio': 0.976601,
|
| 303 |
+
'keep_english_letter': False,
|
| 304 |
+
'charset_name': "ISO-8859-5",
|
| 305 |
+
'language': 'Russian',
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
MacCyrillicModel = {
|
| 309 |
+
'char_to_order_map': macCyrillic_char_to_order_map,
|
| 310 |
+
'precedence_matrix': RussianLangModel,
|
| 311 |
+
'typical_positive_ratio': 0.976601,
|
| 312 |
+
'keep_english_letter': False,
|
| 313 |
+
'charset_name': "MacCyrillic",
|
| 314 |
+
'language': 'Russian',
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
Ibm866Model = {
|
| 318 |
+
'char_to_order_map': IBM866_char_to_order_map,
|
| 319 |
+
'precedence_matrix': RussianLangModel,
|
| 320 |
+
'typical_positive_ratio': 0.976601,
|
| 321 |
+
'keep_english_letter': False,
|
| 322 |
+
'charset_name': "IBM866",
|
| 323 |
+
'language': 'Russian',
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
Ibm855Model = {
|
| 327 |
+
'char_to_order_map': IBM855_char_to_order_map,
|
| 328 |
+
'precedence_matrix': RussianLangModel,
|
| 329 |
+
'typical_positive_ratio': 0.976601,
|
| 330 |
+
'keep_english_letter': False,
|
| 331 |
+
'charset_name': "IBM855",
|
| 332 |
+
'language': 'Russian',
|
| 333 |
+
}
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langgreekmodel.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Communicator client code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
# 255: Control characters that usually does not exist in any text
|
| 29 |
+
# 254: Carriage/Return
|
| 30 |
+
# 253: symbol (punctuation) that does not belong to word
|
| 31 |
+
# 252: 0 - 9
|
| 32 |
+
|
| 33 |
+
# Character Mapping Table:
|
| 34 |
+
Latin7_char_to_order_map = (
|
| 35 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 36 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 37 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 38 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 39 |
+
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
| 40 |
+
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
| 41 |
+
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
| 42 |
+
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
| 43 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
| 44 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
| 45 |
+
253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
| 46 |
+
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
| 47 |
+
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
| 48 |
+
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
| 49 |
+
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
| 50 |
+
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
win1253_char_to_order_map = (
|
| 54 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 55 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 56 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 57 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 58 |
+
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
| 59 |
+
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
| 60 |
+
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
| 61 |
+
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
| 62 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
| 63 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
| 64 |
+
253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
| 65 |
+
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
| 66 |
+
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
| 67 |
+
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
| 68 |
+
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
| 69 |
+
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Model Table:
|
| 73 |
+
# total sequences: 100%
|
| 74 |
+
# first 512 sequences: 98.2851%
|
| 75 |
+
# first 1024 sequences:1.7001%
|
| 76 |
+
# rest sequences: 0.0359%
|
| 77 |
+
# negative sequences: 0.0148%
|
| 78 |
+
GreekLangModel = (
|
| 79 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 80 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 81 |
+
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
|
| 82 |
+
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 83 |
+
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
|
| 84 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
| 85 |
+
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
|
| 86 |
+
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 87 |
+
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
|
| 88 |
+
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 89 |
+
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
|
| 90 |
+
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
| 91 |
+
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
|
| 92 |
+
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 93 |
+
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
|
| 94 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 95 |
+
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
|
| 96 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 97 |
+
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
|
| 98 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 99 |
+
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
|
| 100 |
+
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 101 |
+
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
|
| 102 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 103 |
+
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
|
| 104 |
+
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 105 |
+
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
|
| 106 |
+
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 107 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 108 |
+
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 109 |
+
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
|
| 110 |
+
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 111 |
+
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
|
| 112 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 113 |
+
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
|
| 114 |
+
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 115 |
+
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
|
| 116 |
+
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 117 |
+
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
|
| 118 |
+
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 119 |
+
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
|
| 120 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 121 |
+
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
| 122 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 123 |
+
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
|
| 124 |
+
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 125 |
+
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
|
| 126 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 127 |
+
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
|
| 128 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 129 |
+
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
| 130 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 131 |
+
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
|
| 132 |
+
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 133 |
+
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
|
| 134 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 135 |
+
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
|
| 136 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 137 |
+
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
|
| 138 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 139 |
+
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
|
| 140 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 141 |
+
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
|
| 142 |
+
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
|
| 143 |
+
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
|
| 144 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 145 |
+
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
|
| 146 |
+
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
|
| 147 |
+
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
|
| 148 |
+
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
|
| 149 |
+
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
|
| 150 |
+
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
| 151 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 152 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 153 |
+
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
|
| 154 |
+
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
| 155 |
+
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
|
| 156 |
+
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 157 |
+
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
|
| 158 |
+
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
|
| 159 |
+
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
| 160 |
+
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
| 161 |
+
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
|
| 162 |
+
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
| 163 |
+
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
|
| 164 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 165 |
+
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 166 |
+
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
|
| 167 |
+
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
|
| 168 |
+
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
| 169 |
+
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 170 |
+
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
|
| 171 |
+
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
|
| 172 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 173 |
+
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
|
| 174 |
+
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
|
| 175 |
+
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
|
| 176 |
+
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
| 177 |
+
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
|
| 178 |
+
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
| 179 |
+
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
|
| 180 |
+
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
| 181 |
+
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
|
| 182 |
+
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
| 183 |
+
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
| 184 |
+
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 185 |
+
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
|
| 186 |
+
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
| 187 |
+
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
| 188 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 189 |
+
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 190 |
+
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
|
| 191 |
+
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
|
| 192 |
+
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 193 |
+
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
| 194 |
+
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
| 195 |
+
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 196 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
|
| 197 |
+
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 198 |
+
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 199 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 200 |
+
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
|
| 201 |
+
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
| 202 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 203 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 204 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 205 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 206 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
Latin7GreekModel = {
|
| 210 |
+
'char_to_order_map': Latin7_char_to_order_map,
|
| 211 |
+
'precedence_matrix': GreekLangModel,
|
| 212 |
+
'typical_positive_ratio': 0.982851,
|
| 213 |
+
'keep_english_letter': False,
|
| 214 |
+
'charset_name': "ISO-8859-7",
|
| 215 |
+
'language': 'Greek',
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
Win1253GreekModel = {
|
| 219 |
+
'char_to_order_map': win1253_char_to_order_map,
|
| 220 |
+
'precedence_matrix': GreekLangModel,
|
| 221 |
+
'typical_positive_ratio': 0.982851,
|
| 222 |
+
'keep_english_letter': False,
|
| 223 |
+
'charset_name': "windows-1253",
|
| 224 |
+
'language': 'Greek',
|
| 225 |
+
}
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langthaimodel.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Communicator client code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
# 255: Control characters that usually does not exist in any text
|
| 29 |
+
# 254: Carriage/Return
|
| 30 |
+
# 253: symbol (punctuation) that does not belong to word
|
| 31 |
+
# 252: 0 - 9
|
| 32 |
+
|
| 33 |
+
# The following result for thai was collected from a limited sample (1M).
|
| 34 |
+
|
| 35 |
+
# Character Mapping Table:
|
| 36 |
+
TIS620CharToOrderMap = (
|
| 37 |
+
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
| 38 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
| 39 |
+
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
| 40 |
+
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
| 41 |
+
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
|
| 42 |
+
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
|
| 43 |
+
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
|
| 44 |
+
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
|
| 45 |
+
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
|
| 46 |
+
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
|
| 47 |
+
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
|
| 48 |
+
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
|
| 49 |
+
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
|
| 50 |
+
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
|
| 51 |
+
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
|
| 52 |
+
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Model Table:
|
| 56 |
+
# total sequences: 100%
|
| 57 |
+
# first 512 sequences: 92.6386%
|
| 58 |
+
# first 1024 sequences:7.3177%
|
| 59 |
+
# rest sequences: 1.0230%
|
| 60 |
+
# negative sequences: 0.0436%
|
| 61 |
+
ThaiLangModel = (
|
| 62 |
+
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
|
| 63 |
+
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
|
| 64 |
+
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
|
| 65 |
+
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
| 66 |
+
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
|
| 67 |
+
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
| 68 |
+
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
|
| 69 |
+
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
|
| 70 |
+
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
|
| 71 |
+
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
| 72 |
+
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
|
| 73 |
+
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
|
| 74 |
+
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
|
| 75 |
+
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
| 76 |
+
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
|
| 77 |
+
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
| 78 |
+
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
|
| 79 |
+
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
|
| 80 |
+
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
|
| 81 |
+
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
|
| 82 |
+
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
|
| 83 |
+
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
| 84 |
+
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
|
| 85 |
+
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
|
| 86 |
+
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
|
| 87 |
+
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
| 88 |
+
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
|
| 89 |
+
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 90 |
+
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
|
| 91 |
+
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
| 92 |
+
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
|
| 93 |
+
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
| 94 |
+
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
|
| 95 |
+
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 96 |
+
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
|
| 97 |
+
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
|
| 98 |
+
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
|
| 99 |
+
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 100 |
+
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
|
| 101 |
+
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 102 |
+
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
|
| 103 |
+
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 104 |
+
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
|
| 105 |
+
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 106 |
+
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
|
| 107 |
+
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 108 |
+
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
|
| 109 |
+
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
|
| 110 |
+
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
|
| 111 |
+
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 112 |
+
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
|
| 113 |
+
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
|
| 114 |
+
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
|
| 115 |
+
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 116 |
+
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 117 |
+
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 118 |
+
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
|
| 119 |
+
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 120 |
+
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
|
| 121 |
+
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 122 |
+
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
|
| 123 |
+
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 124 |
+
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
|
| 125 |
+
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
|
| 126 |
+
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
|
| 127 |
+
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
| 128 |
+
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
|
| 129 |
+
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 130 |
+
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
|
| 131 |
+
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 132 |
+
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
|
| 133 |
+
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 134 |
+
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
|
| 135 |
+
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 136 |
+
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
|
| 137 |
+
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 138 |
+
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
|
| 139 |
+
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
|
| 140 |
+
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
|
| 141 |
+
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 142 |
+
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 143 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 144 |
+
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
|
| 145 |
+
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
| 146 |
+
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
|
| 147 |
+
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 148 |
+
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
|
| 149 |
+
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
| 150 |
+
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
|
| 151 |
+
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 152 |
+
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
|
| 153 |
+
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 154 |
+
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 155 |
+
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
|
| 156 |
+
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
|
| 157 |
+
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 158 |
+
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
| 159 |
+
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 160 |
+
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 161 |
+
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 162 |
+
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 163 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 164 |
+
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
|
| 165 |
+
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 166 |
+
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
|
| 167 |
+
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 168 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
| 169 |
+
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 170 |
+
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
|
| 171 |
+
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
| 172 |
+
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
|
| 173 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 174 |
+
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 175 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
|
| 176 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
| 177 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 178 |
+
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
|
| 179 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 180 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 181 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
|
| 182 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 183 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
|
| 184 |
+
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 185 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 186 |
+
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 187 |
+
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 188 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 189 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
TIS620ThaiModel = {
|
| 193 |
+
'char_to_order_map': TIS620CharToOrderMap,
|
| 194 |
+
'precedence_matrix': ThaiLangModel,
|
| 195 |
+
'typical_positive_ratio': 0.926386,
|
| 196 |
+
'keep_english_letter': False,
|
| 197 |
+
'charset_name': "TIS-620",
|
| 198 |
+
'language': 'Thai',
|
| 199 |
+
}
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langturkishmodel.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 3 |
+
# The Original Code is Mozilla Communicator client code.
|
| 4 |
+
#
|
| 5 |
+
# The Initial Developer of the Original Code is
|
| 6 |
+
# Netscape Communications Corporation.
|
| 7 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 8 |
+
# the Initial Developer. All Rights Reserved.
|
| 9 |
+
#
|
| 10 |
+
# Contributor(s):
|
| 11 |
+
# Mark Pilgrim - port to Python
|
| 12 |
+
# Özgür Baskın - Turkish Language Model
|
| 13 |
+
#
|
| 14 |
+
# This library is free software; you can redistribute it and/or
|
| 15 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 16 |
+
# License as published by the Free Software Foundation; either
|
| 17 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 18 |
+
#
|
| 19 |
+
# This library is distributed in the hope that it will be useful,
|
| 20 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 22 |
+
# Lesser General Public License for more details.
|
| 23 |
+
#
|
| 24 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 25 |
+
# License along with this library; if not, write to the Free Software
|
| 26 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 27 |
+
# 02110-1301 USA
|
| 28 |
+
######################### END LICENSE BLOCK #########################
|
| 29 |
+
|
| 30 |
+
# 255: Control characters that usually does not exist in any text
|
| 31 |
+
# 254: Carriage/Return
|
| 32 |
+
# 253: symbol (punctuation) that does not belong to word
|
| 33 |
+
# 252: 0 - 9
|
| 34 |
+
|
| 35 |
+
# Character Mapping Table:
|
| 36 |
+
Latin5_TurkishCharToOrderMap = (
|
| 37 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
| 38 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
| 39 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
| 40 |
+
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
| 41 |
+
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
|
| 42 |
+
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
|
| 43 |
+
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
|
| 44 |
+
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
|
| 45 |
+
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
|
| 46 |
+
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
|
| 47 |
+
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
|
| 48 |
+
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
|
| 49 |
+
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
|
| 50 |
+
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
|
| 51 |
+
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
|
| 52 |
+
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
TurkishLangModel = (
|
| 56 |
+
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
|
| 57 |
+
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
| 58 |
+
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
|
| 59 |
+
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
|
| 60 |
+
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
|
| 61 |
+
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
|
| 62 |
+
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
|
| 63 |
+
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
|
| 64 |
+
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
|
| 65 |
+
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
|
| 66 |
+
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
|
| 67 |
+
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 68 |
+
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
|
| 69 |
+
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
|
| 70 |
+
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
|
| 71 |
+
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
|
| 72 |
+
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
|
| 73 |
+
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
|
| 74 |
+
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
|
| 75 |
+
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
| 76 |
+
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
|
| 77 |
+
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
|
| 78 |
+
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
|
| 79 |
+
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
|
| 80 |
+
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
|
| 81 |
+
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
|
| 82 |
+
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
|
| 83 |
+
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
| 84 |
+
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
|
| 85 |
+
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
| 86 |
+
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
|
| 87 |
+
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
|
| 88 |
+
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
|
| 89 |
+
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 90 |
+
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
|
| 91 |
+
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
|
| 92 |
+
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
|
| 93 |
+
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 94 |
+
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
|
| 95 |
+
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
|
| 96 |
+
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
|
| 97 |
+
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
| 98 |
+
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
|
| 99 |
+
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
|
| 100 |
+
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
|
| 101 |
+
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
|
| 102 |
+
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
| 103 |
+
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 104 |
+
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
|
| 105 |
+
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
| 106 |
+
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
|
| 107 |
+
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
| 108 |
+
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
|
| 109 |
+
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
| 110 |
+
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
|
| 111 |
+
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
| 112 |
+
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
|
| 113 |
+
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
|
| 114 |
+
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
|
| 115 |
+
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 116 |
+
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
|
| 117 |
+
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
|
| 118 |
+
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
|
| 119 |
+
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
|
| 120 |
+
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
|
| 121 |
+
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 122 |
+
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
|
| 123 |
+
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 124 |
+
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
|
| 125 |
+
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
|
| 126 |
+
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
|
| 127 |
+
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
| 128 |
+
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
|
| 129 |
+
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
|
| 130 |
+
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
|
| 131 |
+
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
| 132 |
+
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
|
| 133 |
+
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
|
| 134 |
+
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
|
| 135 |
+
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
| 136 |
+
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
|
| 137 |
+
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
|
| 138 |
+
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
|
| 139 |
+
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
|
| 140 |
+
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
| 141 |
+
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
| 142 |
+
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
|
| 143 |
+
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
|
| 144 |
+
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
|
| 145 |
+
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 146 |
+
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
|
| 147 |
+
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
|
| 148 |
+
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
|
| 149 |
+
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
| 150 |
+
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
|
| 151 |
+
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
| 152 |
+
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
| 153 |
+
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
|
| 154 |
+
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
|
| 155 |
+
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
| 156 |
+
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
|
| 157 |
+
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 158 |
+
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
|
| 159 |
+
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
| 160 |
+
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
|
| 161 |
+
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
|
| 162 |
+
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
|
| 163 |
+
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
| 164 |
+
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
|
| 165 |
+
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
|
| 166 |
+
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
|
| 167 |
+
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
| 168 |
+
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
| 169 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
| 170 |
+
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
| 171 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
|
| 172 |
+
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
|
| 173 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
| 174 |
+
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
| 175 |
+
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 176 |
+
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
| 177 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 178 |
+
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
|
| 179 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
|
| 180 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 181 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 182 |
+
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 183 |
+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
Latin5TurkishModel = {
|
| 187 |
+
'char_to_order_map': Latin5_TurkishCharToOrderMap,
|
| 188 |
+
'precedence_matrix': TurkishLangModel,
|
| 189 |
+
'typical_positive_ratio': 0.970290,
|
| 190 |
+
'keep_english_letter': True,
|
| 191 |
+
'charset_name': "ISO-8859-9",
|
| 192 |
+
'language': 'Turkish',
|
| 193 |
+
}
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/mbcsgroupprober.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Universal charset detector code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 2001
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
# Shy Shalom - original C code
|
| 12 |
+
# Proofpoint, Inc.
|
| 13 |
+
#
|
| 14 |
+
# This library is free software; you can redistribute it and/or
|
| 15 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 16 |
+
# License as published by the Free Software Foundation; either
|
| 17 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 18 |
+
#
|
| 19 |
+
# This library is distributed in the hope that it will be useful,
|
| 20 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 22 |
+
# Lesser General Public License for more details.
|
| 23 |
+
#
|
| 24 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 25 |
+
# License along with this library; if not, write to the Free Software
|
| 26 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 27 |
+
# 02110-1301 USA
|
| 28 |
+
######################### END LICENSE BLOCK #########################
|
| 29 |
+
|
| 30 |
+
from .charsetgroupprober import CharSetGroupProber
|
| 31 |
+
from .utf8prober import UTF8Prober
|
| 32 |
+
from .sjisprober import SJISProber
|
| 33 |
+
from .eucjpprober import EUCJPProber
|
| 34 |
+
from .gb2312prober import GB2312Prober
|
| 35 |
+
from .euckrprober import EUCKRProber
|
| 36 |
+
from .cp949prober import CP949Prober
|
| 37 |
+
from .big5prober import Big5Prober
|
| 38 |
+
from .euctwprober import EUCTWProber
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class MBCSGroupProber(CharSetGroupProber):
|
| 42 |
+
def __init__(self, lang_filter=None):
|
| 43 |
+
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
| 44 |
+
self.probers = [
|
| 45 |
+
UTF8Prober(),
|
| 46 |
+
SJISProber(),
|
| 47 |
+
EUCJPProber(),
|
| 48 |
+
GB2312Prober(),
|
| 49 |
+
EUCKRProber(),
|
| 50 |
+
CP949Prober(),
|
| 51 |
+
Big5Prober(),
|
| 52 |
+
EUCTWProber()
|
| 53 |
+
]
|
| 54 |
+
self.reset()
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sbcsgroupprober.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Universal charset detector code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 2001
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
# Shy Shalom - original C code
|
| 12 |
+
#
|
| 13 |
+
# This library is free software; you can redistribute it and/or
|
| 14 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 15 |
+
# License as published by the Free Software Foundation; either
|
| 16 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 17 |
+
#
|
| 18 |
+
# This library is distributed in the hope that it will be useful,
|
| 19 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 20 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 21 |
+
# Lesser General Public License for more details.
|
| 22 |
+
#
|
| 23 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 24 |
+
# License along with this library; if not, write to the Free Software
|
| 25 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 26 |
+
# 02110-1301 USA
|
| 27 |
+
######################### END LICENSE BLOCK #########################
|
| 28 |
+
|
| 29 |
+
from .charsetgroupprober import CharSetGroupProber
|
| 30 |
+
from .sbcharsetprober import SingleByteCharSetProber
|
| 31 |
+
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
|
| 32 |
+
Latin5CyrillicModel, MacCyrillicModel,
|
| 33 |
+
Ibm866Model, Ibm855Model)
|
| 34 |
+
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
| 35 |
+
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
| 36 |
+
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
| 37 |
+
from .langthaimodel import TIS620ThaiModel
|
| 38 |
+
from .langhebrewmodel import Win1255HebrewModel
|
| 39 |
+
from .hebrewprober import HebrewProber
|
| 40 |
+
from .langturkishmodel import Latin5TurkishModel
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class SBCSGroupProber(CharSetGroupProber):
|
| 44 |
+
def __init__(self):
|
| 45 |
+
super(SBCSGroupProber, self).__init__()
|
| 46 |
+
self.probers = [
|
| 47 |
+
SingleByteCharSetProber(Win1251CyrillicModel),
|
| 48 |
+
SingleByteCharSetProber(Koi8rModel),
|
| 49 |
+
SingleByteCharSetProber(Latin5CyrillicModel),
|
| 50 |
+
SingleByteCharSetProber(MacCyrillicModel),
|
| 51 |
+
SingleByteCharSetProber(Ibm866Model),
|
| 52 |
+
SingleByteCharSetProber(Ibm855Model),
|
| 53 |
+
SingleByteCharSetProber(Latin7GreekModel),
|
| 54 |
+
SingleByteCharSetProber(Win1253GreekModel),
|
| 55 |
+
SingleByteCharSetProber(Latin5BulgarianModel),
|
| 56 |
+
SingleByteCharSetProber(Win1251BulgarianModel),
|
| 57 |
+
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
| 58 |
+
# after we retrain model.
|
| 59 |
+
# SingleByteCharSetProber(Latin2HungarianModel),
|
| 60 |
+
# SingleByteCharSetProber(Win1250HungarianModel),
|
| 61 |
+
SingleByteCharSetProber(TIS620ThaiModel),
|
| 62 |
+
SingleByteCharSetProber(Latin5TurkishModel),
|
| 63 |
+
]
|
| 64 |
+
hebrew_prober = HebrewProber()
|
| 65 |
+
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
|
| 66 |
+
False, hebrew_prober)
|
| 67 |
+
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
|
| 68 |
+
hebrew_prober)
|
| 69 |
+
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
|
| 70 |
+
self.probers.extend([hebrew_prober, logical_hebrew_prober,
|
| 71 |
+
visual_hebrew_prober])
|
| 72 |
+
|
| 73 |
+
self.reset()
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sjisprober.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is mozilla.org code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
#
|
| 12 |
+
# This library is free software; you can redistribute it and/or
|
| 13 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 14 |
+
# License as published by the Free Software Foundation; either
|
| 15 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 16 |
+
#
|
| 17 |
+
# This library is distributed in the hope that it will be useful,
|
| 18 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 19 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 20 |
+
# Lesser General Public License for more details.
|
| 21 |
+
#
|
| 22 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 23 |
+
# License along with this library; if not, write to the Free Software
|
| 24 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 25 |
+
# 02110-1301 USA
|
| 26 |
+
######################### END LICENSE BLOCK #########################
|
| 27 |
+
|
| 28 |
+
from .mbcharsetprober import MultiByteCharSetProber
|
| 29 |
+
from .codingstatemachine import CodingStateMachine
|
| 30 |
+
from .chardistribution import SJISDistributionAnalysis
|
| 31 |
+
from .jpcntx import SJISContextAnalysis
|
| 32 |
+
from .mbcssm import SJIS_SM_MODEL
|
| 33 |
+
from .enums import ProbingState, MachineState
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class SJISProber(MultiByteCharSetProber):
|
| 37 |
+
def __init__(self):
|
| 38 |
+
super(SJISProber, self).__init__()
|
| 39 |
+
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
| 40 |
+
self.distribution_analyzer = SJISDistributionAnalysis()
|
| 41 |
+
self.context_analyzer = SJISContextAnalysis()
|
| 42 |
+
self.reset()
|
| 43 |
+
|
| 44 |
+
def reset(self):
|
| 45 |
+
super(SJISProber, self).reset()
|
| 46 |
+
self.context_analyzer.reset()
|
| 47 |
+
|
| 48 |
+
@property
|
| 49 |
+
def charset_name(self):
|
| 50 |
+
return self.context_analyzer.charset_name
|
| 51 |
+
|
| 52 |
+
@property
|
| 53 |
+
def language(self):
|
| 54 |
+
return "Japanese"
|
| 55 |
+
|
| 56 |
+
def feed(self, byte_str):
|
| 57 |
+
for i in range(len(byte_str)):
|
| 58 |
+
coding_state = self.coding_sm.next_state(byte_str[i])
|
| 59 |
+
if coding_state == MachineState.ERROR:
|
| 60 |
+
self.logger.debug('%s %s prober hit error at byte %s',
|
| 61 |
+
self.charset_name, self.language, i)
|
| 62 |
+
self._state = ProbingState.NOT_ME
|
| 63 |
+
break
|
| 64 |
+
elif coding_state == MachineState.ITS_ME:
|
| 65 |
+
self._state = ProbingState.FOUND_IT
|
| 66 |
+
break
|
| 67 |
+
elif coding_state == MachineState.START:
|
| 68 |
+
char_len = self.coding_sm.get_current_charlen()
|
| 69 |
+
if i == 0:
|
| 70 |
+
self._last_char[1] = byte_str[0]
|
| 71 |
+
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
| 72 |
+
char_len)
|
| 73 |
+
self.distribution_analyzer.feed(self._last_char, char_len)
|
| 74 |
+
else:
|
| 75 |
+
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
| 76 |
+
- char_len], char_len)
|
| 77 |
+
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
| 78 |
+
char_len)
|
| 79 |
+
|
| 80 |
+
self._last_char[0] = byte_str[-1]
|
| 81 |
+
|
| 82 |
+
if self.state == ProbingState.DETECTING:
|
| 83 |
+
if (self.context_analyzer.got_enough_data() and
|
| 84 |
+
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
| 85 |
+
self._state = ProbingState.FOUND_IT
|
| 86 |
+
|
| 87 |
+
return self.state
|
| 88 |
+
|
| 89 |
+
def get_confidence(self):
|
| 90 |
+
context_conf = self.context_analyzer.get_confidence()
|
| 91 |
+
distrib_conf = self.distribution_analyzer.get_confidence()
|
| 92 |
+
return max(context_conf, distrib_conf)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/universaldetector.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################## BEGIN LICENSE BLOCK ########################
|
| 2 |
+
# The Original Code is Mozilla Universal charset detector code.
|
| 3 |
+
#
|
| 4 |
+
# The Initial Developer of the Original Code is
|
| 5 |
+
# Netscape Communications Corporation.
|
| 6 |
+
# Portions created by the Initial Developer are Copyright (C) 2001
|
| 7 |
+
# the Initial Developer. All Rights Reserved.
|
| 8 |
+
#
|
| 9 |
+
# Contributor(s):
|
| 10 |
+
# Mark Pilgrim - port to Python
|
| 11 |
+
# Shy Shalom - original C code
|
| 12 |
+
#
|
| 13 |
+
# This library is free software; you can redistribute it and/or
|
| 14 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 15 |
+
# License as published by the Free Software Foundation; either
|
| 16 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 17 |
+
#
|
| 18 |
+
# This library is distributed in the hope that it will be useful,
|
| 19 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 20 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 21 |
+
# Lesser General Public License for more details.
|
| 22 |
+
#
|
| 23 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 24 |
+
# License along with this library; if not, write to the Free Software
|
| 25 |
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
| 26 |
+
# 02110-1301 USA
|
| 27 |
+
######################### END LICENSE BLOCK #########################
|
| 28 |
+
"""
|
| 29 |
+
Module containing the UniversalDetector detector class, which is the primary
|
| 30 |
+
class a user of ``chardet`` should use.
|
| 31 |
+
|
| 32 |
+
:author: Mark Pilgrim (initial port to Python)
|
| 33 |
+
:author: Shy Shalom (original C code)
|
| 34 |
+
:author: Dan Blanchard (major refactoring for 3.0)
|
| 35 |
+
:author: Ian Cordasco
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
import codecs
|
| 40 |
+
import logging
|
| 41 |
+
import re
|
| 42 |
+
|
| 43 |
+
from .charsetgroupprober import CharSetGroupProber
|
| 44 |
+
from .enums import InputState, LanguageFilter, ProbingState
|
| 45 |
+
from .escprober import EscCharSetProber
|
| 46 |
+
from .latin1prober import Latin1Prober
|
| 47 |
+
from .mbcsgroupprober import MBCSGroupProber
|
| 48 |
+
from .sbcsgroupprober import SBCSGroupProber
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class UniversalDetector(object):
|
| 52 |
+
"""
|
| 53 |
+
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
| 54 |
+
and coordinates all of the different charset probers.
|
| 55 |
+
|
| 56 |
+
To get a ``dict`` containing an encoding and its confidence, you can simply
|
| 57 |
+
run:
|
| 58 |
+
|
| 59 |
+
.. code::
|
| 60 |
+
|
| 61 |
+
u = UniversalDetector()
|
| 62 |
+
u.feed(some_bytes)
|
| 63 |
+
u.close()
|
| 64 |
+
detected = u.result
|
| 65 |
+
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
MINIMUM_THRESHOLD = 0.20
|
| 69 |
+
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
| 70 |
+
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
| 71 |
+
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
| 72 |
+
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
| 73 |
+
'iso-8859-2': 'Windows-1250',
|
| 74 |
+
'iso-8859-5': 'Windows-1251',
|
| 75 |
+
'iso-8859-6': 'Windows-1256',
|
| 76 |
+
'iso-8859-7': 'Windows-1253',
|
| 77 |
+
'iso-8859-8': 'Windows-1255',
|
| 78 |
+
'iso-8859-9': 'Windows-1254',
|
| 79 |
+
'iso-8859-13': 'Windows-1257'}
|
| 80 |
+
|
| 81 |
+
def __init__(self, lang_filter=LanguageFilter.ALL):
|
| 82 |
+
self._esc_charset_prober = None
|
| 83 |
+
self._charset_probers = []
|
| 84 |
+
self.result = None
|
| 85 |
+
self.done = None
|
| 86 |
+
self._got_data = None
|
| 87 |
+
self._input_state = None
|
| 88 |
+
self._last_char = None
|
| 89 |
+
self.lang_filter = lang_filter
|
| 90 |
+
self.logger = logging.getLogger(__name__)
|
| 91 |
+
self._has_win_bytes = None
|
| 92 |
+
self.reset()
|
| 93 |
+
|
| 94 |
+
def reset(self):
|
| 95 |
+
"""
|
| 96 |
+
Reset the UniversalDetector and all of its probers back to their
|
| 97 |
+
initial states. This is called by ``__init__``, so you only need to
|
| 98 |
+
call this directly in between analyses of different documents.
|
| 99 |
+
"""
|
| 100 |
+
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
| 101 |
+
self.done = False
|
| 102 |
+
self._got_data = False
|
| 103 |
+
self._has_win_bytes = False
|
| 104 |
+
self._input_state = InputState.PURE_ASCII
|
| 105 |
+
self._last_char = b''
|
| 106 |
+
if self._esc_charset_prober:
|
| 107 |
+
self._esc_charset_prober.reset()
|
| 108 |
+
for prober in self._charset_probers:
|
| 109 |
+
prober.reset()
|
| 110 |
+
|
| 111 |
+
def feed(self, byte_str):
|
| 112 |
+
"""
|
| 113 |
+
Takes a chunk of a document and feeds it through all of the relevant
|
| 114 |
+
charset probers.
|
| 115 |
+
|
| 116 |
+
After calling ``feed``, you can check the value of the ``done``
|
| 117 |
+
attribute to see if you need to continue feeding the
|
| 118 |
+
``UniversalDetector`` more data, or if it has made a prediction
|
| 119 |
+
(in the ``result`` attribute).
|
| 120 |
+
|
| 121 |
+
.. note::
|
| 122 |
+
You should always call ``close`` when you're done feeding in your
|
| 123 |
+
document if ``done`` is not already ``True``.
|
| 124 |
+
"""
|
| 125 |
+
if self.done:
|
| 126 |
+
return
|
| 127 |
+
|
| 128 |
+
if not len(byte_str):
|
| 129 |
+
return
|
| 130 |
+
|
| 131 |
+
if not isinstance(byte_str, bytearray):
|
| 132 |
+
byte_str = bytearray(byte_str)
|
| 133 |
+
|
| 134 |
+
# First check for known BOMs, since these are guaranteed to be correct
|
| 135 |
+
if not self._got_data:
|
| 136 |
+
# If the data starts with BOM, we know it is UTF
|
| 137 |
+
if byte_str.startswith(codecs.BOM_UTF8):
|
| 138 |
+
# EF BB BF UTF-8 with BOM
|
| 139 |
+
self.result = {'encoding': "UTF-8-SIG",
|
| 140 |
+
'confidence': 1.0,
|
| 141 |
+
'language': ''}
|
| 142 |
+
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
| 143 |
+
codecs.BOM_UTF32_BE)):
|
| 144 |
+
# FF FE 00 00 UTF-32, little-endian BOM
|
| 145 |
+
# 00 00 FE FF UTF-32, big-endian BOM
|
| 146 |
+
self.result = {'encoding': "UTF-32",
|
| 147 |
+
'confidence': 1.0,
|
| 148 |
+
'language': ''}
|
| 149 |
+
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
| 150 |
+
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
| 151 |
+
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
| 152 |
+
'confidence': 1.0,
|
| 153 |
+
'language': ''}
|
| 154 |
+
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
| 155 |
+
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
| 156 |
+
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
| 157 |
+
'confidence': 1.0,
|
| 158 |
+
'language': ''}
|
| 159 |
+
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
| 160 |
+
# FF FE UTF-16, little endian BOM
|
| 161 |
+
# FE FF UTF-16, big endian BOM
|
| 162 |
+
self.result = {'encoding': "UTF-16",
|
| 163 |
+
'confidence': 1.0,
|
| 164 |
+
'language': ''}
|
| 165 |
+
|
| 166 |
+
self._got_data = True
|
| 167 |
+
if self.result['encoding'] is not None:
|
| 168 |
+
self.done = True
|
| 169 |
+
return
|
| 170 |
+
|
| 171 |
+
# If none of those matched and we've only see ASCII so far, check
|
| 172 |
+
# for high bytes and escape sequences
|
| 173 |
+
if self._input_state == InputState.PURE_ASCII:
|
| 174 |
+
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
| 175 |
+
self._input_state = InputState.HIGH_BYTE
|
| 176 |
+
elif self._input_state == InputState.PURE_ASCII and \
|
| 177 |
+
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
| 178 |
+
self._input_state = InputState.ESC_ASCII
|
| 179 |
+
|
| 180 |
+
self._last_char = byte_str[-1:]
|
| 181 |
+
|
| 182 |
+
# If we've seen escape sequences, use the EscCharSetProber, which
|
| 183 |
+
# uses a simple state machine to check for known escape sequences in
|
| 184 |
+
# HZ and ISO-2022 encodings, since those are the only encodings that
|
| 185 |
+
# use such sequences.
|
| 186 |
+
if self._input_state == InputState.ESC_ASCII:
|
| 187 |
+
if not self._esc_charset_prober:
|
| 188 |
+
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
| 189 |
+
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
| 190 |
+
self.result = {'encoding':
|
| 191 |
+
self._esc_charset_prober.charset_name,
|
| 192 |
+
'confidence':
|
| 193 |
+
self._esc_charset_prober.get_confidence(),
|
| 194 |
+
'language':
|
| 195 |
+
self._esc_charset_prober.language}
|
| 196 |
+
self.done = True
|
| 197 |
+
# If we've seen high bytes (i.e., those with values greater than 127),
|
| 198 |
+
# we need to do more complicated checks using all our multi-byte and
|
| 199 |
+
# single-byte probers that are left. The single-byte probers
|
| 200 |
+
# use character bigram distributions to determine the encoding, whereas
|
| 201 |
+
# the multi-byte probers use a combination of character unigram and
|
| 202 |
+
# bigram distributions.
|
| 203 |
+
elif self._input_state == InputState.HIGH_BYTE:
|
| 204 |
+
if not self._charset_probers:
|
| 205 |
+
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
| 206 |
+
# If we're checking non-CJK encodings, use single-byte prober
|
| 207 |
+
if self.lang_filter & LanguageFilter.NON_CJK:
|
| 208 |
+
self._charset_probers.append(SBCSGroupProber())
|
| 209 |
+
self._charset_probers.append(Latin1Prober())
|
| 210 |
+
for prober in self._charset_probers:
|
| 211 |
+
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
| 212 |
+
self.result = {'encoding': prober.charset_name,
|
| 213 |
+
'confidence': prober.get_confidence(),
|
| 214 |
+
'language': prober.language}
|
| 215 |
+
self.done = True
|
| 216 |
+
break
|
| 217 |
+
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
| 218 |
+
self._has_win_bytes = True
|
| 219 |
+
|
| 220 |
+
def close(self):
|
| 221 |
+
"""
|
| 222 |
+
Stop analyzing the current document and come up with a final
|
| 223 |
+
prediction.
|
| 224 |
+
|
| 225 |
+
:returns: The ``result`` attribute, a ``dict`` with the keys
|
| 226 |
+
`encoding`, `confidence`, and `language`.
|
| 227 |
+
"""
|
| 228 |
+
# Don't bother with checks if we're already done
|
| 229 |
+
if self.done:
|
| 230 |
+
return self.result
|
| 231 |
+
self.done = True
|
| 232 |
+
|
| 233 |
+
if not self._got_data:
|
| 234 |
+
self.logger.debug('no data received!')
|
| 235 |
+
|
| 236 |
+
# Default to ASCII if it is all we've seen so far
|
| 237 |
+
elif self._input_state == InputState.PURE_ASCII:
|
| 238 |
+
self.result = {'encoding': 'ascii',
|
| 239 |
+
'confidence': 1.0,
|
| 240 |
+
'language': ''}
|
| 241 |
+
|
| 242 |
+
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
| 243 |
+
elif self._input_state == InputState.HIGH_BYTE:
|
| 244 |
+
prober_confidence = None
|
| 245 |
+
max_prober_confidence = 0.0
|
| 246 |
+
max_prober = None
|
| 247 |
+
for prober in self._charset_probers:
|
| 248 |
+
if not prober:
|
| 249 |
+
continue
|
| 250 |
+
prober_confidence = prober.get_confidence()
|
| 251 |
+
if prober_confidence > max_prober_confidence:
|
| 252 |
+
max_prober_confidence = prober_confidence
|
| 253 |
+
max_prober = prober
|
| 254 |
+
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
| 255 |
+
charset_name = max_prober.charset_name
|
| 256 |
+
lower_charset_name = max_prober.charset_name.lower()
|
| 257 |
+
confidence = max_prober.get_confidence()
|
| 258 |
+
# Use Windows encoding name instead of ISO-8859 if we saw any
|
| 259 |
+
# extra Windows-specific bytes
|
| 260 |
+
if lower_charset_name.startswith('iso-8859'):
|
| 261 |
+
if self._has_win_bytes:
|
| 262 |
+
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
| 263 |
+
charset_name)
|
| 264 |
+
self.result = {'encoding': charset_name,
|
| 265 |
+
'confidence': confidence,
|
| 266 |
+
'language': max_prober.language}
|
| 267 |
+
|
| 268 |
+
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
| 269 |
+
if self.logger.getEffectiveLevel() == logging.DEBUG:
|
| 270 |
+
if self.result['encoding'] is None:
|
| 271 |
+
self.logger.debug('no probers hit minimum threshold')
|
| 272 |
+
for group_prober in self._charset_probers:
|
| 273 |
+
if not group_prober:
|
| 274 |
+
continue
|
| 275 |
+
if isinstance(group_prober, CharSetGroupProber):
|
| 276 |
+
for prober in group_prober.probers:
|
| 277 |
+
self.logger.debug('%s %s confidence = %s',
|
| 278 |
+
prober.charset_name,
|
| 279 |
+
prober.language,
|
| 280 |
+
prober.get_confidence())
|
| 281 |
+
else:
|
| 282 |
+
self.logger.debug('%s %s confidence = %s',
|
| 283 |
+
prober.charset_name,
|
| 284 |
+
prober.language,
|
| 285 |
+
prober.get_confidence())
|
| 286 |
+
return self.result
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__version__ = "2.2.0"
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/cli.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
from pprint import pprint
|
| 5 |
+
|
| 6 |
+
from . import __version__, api
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def parse_args(parse_this=None):
|
| 10 |
+
parser = build_parser()
|
| 11 |
+
return parser.parse_args(parse_this)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def build_parser():
|
| 15 |
+
parser = argparse.ArgumentParser()
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"-V",
|
| 18 |
+
"--version",
|
| 19 |
+
action="version",
|
| 20 |
+
help="Show the conda-package-handling version number and exit.",
|
| 21 |
+
version=f"conda-package-handling {__version__}",
|
| 22 |
+
)
|
| 23 |
+
sp = parser.add_subparsers(title="subcommands", dest="subcommand", required=True)
|
| 24 |
+
|
| 25 |
+
extract_parser = sp.add_parser("extract", help="extract package contents", aliases=["x"])
|
| 26 |
+
extract_parser.add_argument("archive_path", help="path to archive to extract")
|
| 27 |
+
extract_parser.add_argument(
|
| 28 |
+
"--dest",
|
| 29 |
+
help="destination folder to extract to. If not set, defaults to"
|
| 30 |
+
" package filename minus extension in the same folder as the input archive."
|
| 31 |
+
" May be relative path used in tandem with the --prefix flag.",
|
| 32 |
+
)
|
| 33 |
+
extract_parser.add_argument(
|
| 34 |
+
"--prefix",
|
| 35 |
+
help="base directory to extract to. Use this to set the base"
|
| 36 |
+
" directory, while allowing the folder name to be automatically determined "
|
| 37 |
+
"by the input filename. An abspath --prefix with an unset --dest will "
|
| 38 |
+
"achieve this.",
|
| 39 |
+
)
|
| 40 |
+
extract_parser.add_argument(
|
| 41 |
+
"--info",
|
| 42 |
+
help="If the archive supports separate metadata, this"
|
| 43 |
+
" flag extracts only the metadata in the info folder from the "
|
| 44 |
+
"package. If the archive does not support separate metadata, this "
|
| 45 |
+
"flag has no effect and all files are extracted.",
|
| 46 |
+
action="store_true",
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
create_parser = sp.add_parser("create", help="bundle files into a package", aliases=["c"])
|
| 50 |
+
create_parser.add_argument(
|
| 51 |
+
"prefix",
|
| 52 |
+
help="folder of files to bundle. Not strictly required to"
|
| 53 |
+
" have conda package metadata, but if conda package metadata isn't "
|
| 54 |
+
"present, you'll see a warning and your file will not work as a "
|
| 55 |
+
"conda package",
|
| 56 |
+
)
|
| 57 |
+
create_parser.add_argument(
|
| 58 |
+
"out_fn",
|
| 59 |
+
help="Filename of archive to be created. Extension determines package type.",
|
| 60 |
+
)
|
| 61 |
+
create_parser.add_argument(
|
| 62 |
+
"--file-list",
|
| 63 |
+
help="Path to file containing one relative path per"
|
| 64 |
+
" line that should be included in the archive. If not provided, "
|
| 65 |
+
"lists all files in the prefix.",
|
| 66 |
+
)
|
| 67 |
+
create_parser.add_argument("--out-folder", help="Folder to dump final archive to")
|
| 68 |
+
|
| 69 |
+
convert_parser = sp.add_parser(
|
| 70 |
+
"transmute", help="convert from one package type to another", aliases=["t"]
|
| 71 |
+
)
|
| 72 |
+
convert_parser.add_argument(
|
| 73 |
+
"in_file", help="existing file to convert from. Glob patterns accepted."
|
| 74 |
+
)
|
| 75 |
+
convert_parser.add_argument(
|
| 76 |
+
"out_ext",
|
| 77 |
+
help="extension of file to convert to. Examples: .tar.bz2, .conda",
|
| 78 |
+
)
|
| 79 |
+
convert_parser.add_argument("--out-folder", help="Folder to dump final archive to")
|
| 80 |
+
convert_parser.add_argument(
|
| 81 |
+
"--force", action="store_true", help="Force overwrite existing package"
|
| 82 |
+
)
|
| 83 |
+
convert_parser.add_argument(
|
| 84 |
+
"--processes",
|
| 85 |
+
type=int,
|
| 86 |
+
help="Max number of processes to use. If not set, defaults to 1.",
|
| 87 |
+
)
|
| 88 |
+
convert_parser.add_argument(
|
| 89 |
+
"--zstd-compression-level",
|
| 90 |
+
help=(
|
| 91 |
+
"When building v2 packages, set the compression level used by "
|
| 92 |
+
"conda-package-handling. Defaults to 19."
|
| 93 |
+
),
|
| 94 |
+
type=int,
|
| 95 |
+
choices=range(1, 23),
|
| 96 |
+
default=19,
|
| 97 |
+
)
|
| 98 |
+
convert_parser.add_argument(
|
| 99 |
+
"--zstd-compression-threads",
|
| 100 |
+
help=(
|
| 101 |
+
"When building v2 packages, set the compression threads used by "
|
| 102 |
+
"conda-package-handling. Defaults to 1. -1=automatic."
|
| 103 |
+
),
|
| 104 |
+
type=int,
|
| 105 |
+
default=1,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
return parser
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def main(args=None):
|
| 112 |
+
args = parse_args(args)
|
| 113 |
+
if hasattr(args, "out_folder") and args.out_folder:
|
| 114 |
+
args.out_folder = (
|
| 115 |
+
os.path.abspath(os.path.normpath(os.path.expanduser(args.out_folder))) + os.sep
|
| 116 |
+
)
|
| 117 |
+
if args.subcommand in ("extract", "x"):
|
| 118 |
+
if args.info:
|
| 119 |
+
api.extract(args.archive_path, args.dest, components="info", prefix=args.prefix)
|
| 120 |
+
else:
|
| 121 |
+
api.extract(args.archive_path, args.dest, prefix=args.prefix)
|
| 122 |
+
elif args.subcommand in ("create", "c"):
|
| 123 |
+
api.create(args.prefix, args.file_list, args.out_fn, args.out_folder)
|
| 124 |
+
elif args.subcommand in ("transmute", "t"):
|
| 125 |
+
failed_files = api.transmute(
|
| 126 |
+
args.in_file,
|
| 127 |
+
args.out_ext,
|
| 128 |
+
args.out_folder,
|
| 129 |
+
args.processes or 1,
|
| 130 |
+
force=args.force,
|
| 131 |
+
zstd_compress_level=args.zstd_compression_level,
|
| 132 |
+
zstd_compress_threads=args.zstd_compression_threads,
|
| 133 |
+
)
|
| 134 |
+
if failed_files:
|
| 135 |
+
print("failed files:")
|
| 136 |
+
pprint(failed_files)
|
| 137 |
+
sys.exit(1)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
if __name__ == "__main__": # pragma: no cover
|
| 141 |
+
main(args=None)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/conda_fmt.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
The 'new' conda format, introduced in late 2018/early 2019.
|
| 3 |
+
|
| 4 |
+
https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/packages.html
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
import tarfile
|
| 12 |
+
from typing import Callable
|
| 13 |
+
from zipfile import ZIP_STORED, ZipFile
|
| 14 |
+
|
| 15 |
+
import zstandard
|
| 16 |
+
|
| 17 |
+
from . import utils
|
| 18 |
+
from .interface import AbstractBaseFormat
|
| 19 |
+
from .streaming import _extract
|
| 20 |
+
|
| 21 |
+
CONDA_PACKAGE_FORMAT_VERSION = 2
|
| 22 |
+
DEFAULT_COMPRESSION_TUPLE = (".tar.zst", "zstd", "zstd:compression-level=19")
|
| 23 |
+
|
| 24 |
+
# increase to reduce speed and increase compression (22 = conda's default)
|
| 25 |
+
ZSTD_COMPRESS_LEVEL = 19
|
| 26 |
+
# increase to reduce compression (slightly) and increase speed
|
| 27 |
+
ZSTD_COMPRESS_THREADS = 1
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class CondaFormat_v2(AbstractBaseFormat):
|
| 31 |
+
"""If there's another conda format or breaking changes, please create a new class and keep this
|
| 32 |
+
one, so that handling of v2 stays working."""
|
| 33 |
+
|
| 34 |
+
@staticmethod
|
| 35 |
+
def supported(fn):
|
| 36 |
+
return fn.endswith(".conda")
|
| 37 |
+
|
| 38 |
+
@staticmethod
|
| 39 |
+
def extract(fn, dest_dir, **kw):
|
| 40 |
+
components = utils.ensure_list(kw.get("components")) or ("info", "pkg")
|
| 41 |
+
if not os.path.isabs(fn):
|
| 42 |
+
fn = os.path.normpath(os.path.join(os.getcwd(), fn))
|
| 43 |
+
if not os.path.isdir(dest_dir):
|
| 44 |
+
os.makedirs(dest_dir)
|
| 45 |
+
|
| 46 |
+
_extract(str(fn), str(dest_dir), components=components)
|
| 47 |
+
|
| 48 |
+
@staticmethod
|
| 49 |
+
def extract_info(fn, dest_dir=None):
|
| 50 |
+
return CondaFormat_v2.extract(fn, dest_dir, components=["info"])
|
| 51 |
+
|
| 52 |
+
@staticmethod
|
| 53 |
+
def create(
|
| 54 |
+
prefix,
|
| 55 |
+
file_list,
|
| 56 |
+
out_fn,
|
| 57 |
+
out_folder=os.getcwd(),
|
| 58 |
+
compressor: Callable[[], zstandard.ZstdCompressor] | None = None,
|
| 59 |
+
compression_tuple=(None, None, None),
|
| 60 |
+
):
|
| 61 |
+
if os.path.isabs(out_fn):
|
| 62 |
+
out_folder = os.path.dirname(out_fn)
|
| 63 |
+
out_fn = os.path.basename(out_fn)
|
| 64 |
+
conda_pkg_fn = os.path.join(out_folder, out_fn)
|
| 65 |
+
file_id = out_fn.replace(".conda", "")
|
| 66 |
+
pkg_files = utils.filter_info_files(file_list, prefix)
|
| 67 |
+
# preserve order
|
| 68 |
+
pkg_files_set = set(pkg_files)
|
| 69 |
+
info_files = list(f for f in file_list if f not in pkg_files_set)
|
| 70 |
+
|
| 71 |
+
if compressor and (compression_tuple != (None, None, None)):
|
| 72 |
+
raise ValueError("Supply one of compressor= or (deprecated) compression_tuple=")
|
| 73 |
+
|
| 74 |
+
if compressor is None:
|
| 75 |
+
compressor = lambda: zstandard.ZstdCompressor(
|
| 76 |
+
level=ZSTD_COMPRESS_LEVEL,
|
| 77 |
+
threads=ZSTD_COMPRESS_THREADS,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# legacy libarchive-ish compatibility
|
| 81 |
+
ext, comp_filter, filter_opts = compression_tuple
|
| 82 |
+
if filter_opts and filter_opts.startswith("zstd:compression-level="):
|
| 83 |
+
compressor = lambda: zstandard.ZstdCompressor(
|
| 84 |
+
level=int(filter_opts.split("=", 1)[-1]),
|
| 85 |
+
threads=ZSTD_COMPRESS_THREADS,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
class NullWriter:
|
| 89 |
+
"""
|
| 90 |
+
zstd uses less memory on extract if size is known.
|
| 91 |
+
"""
|
| 92 |
+
|
| 93 |
+
def __init__(self):
|
| 94 |
+
self.size = 0
|
| 95 |
+
|
| 96 |
+
def write(self, bytes):
|
| 97 |
+
self.size += len(bytes)
|
| 98 |
+
return len(bytes)
|
| 99 |
+
|
| 100 |
+
def tell(self):
|
| 101 |
+
return self.size
|
| 102 |
+
|
| 103 |
+
with ZipFile(conda_pkg_fn, "w", compression=ZIP_STORED) as conda_file, utils.tmp_chdir(
|
| 104 |
+
prefix
|
| 105 |
+
):
|
| 106 |
+
pkg_metadata = {"conda_pkg_format_version": CONDA_PACKAGE_FORMAT_VERSION}
|
| 107 |
+
conda_file.writestr("metadata.json", json.dumps(pkg_metadata))
|
| 108 |
+
|
| 109 |
+
components_files = (f"pkg-{file_id}.tar.zst", pkg_files), (
|
| 110 |
+
f"info-{file_id}.tar.zst",
|
| 111 |
+
info_files,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# put the info last, for parity with updated transmute.
|
| 115 |
+
compress = compressor()
|
| 116 |
+
for component, files in components_files:
|
| 117 |
+
# If size is known, the decompressor may be able to allocate less memory.
|
| 118 |
+
# The compressor will error if size is not correct.
|
| 119 |
+
with tarfile.TarFile(fileobj=NullWriter(), mode="w") as sizer: # type: ignore
|
| 120 |
+
for file in files:
|
| 121 |
+
sizer.add(file, filter=utils.anonymize_tarinfo)
|
| 122 |
+
size = sizer.fileobj.size # type: ignore
|
| 123 |
+
|
| 124 |
+
with conda_file.open(component, "w") as component_file:
|
| 125 |
+
# only one stream_writer() per compressor() must be in use at a time
|
| 126 |
+
component_stream = compress.stream_writer(
|
| 127 |
+
component_file, size=size, closefd=False
|
| 128 |
+
)
|
| 129 |
+
component_tar = tarfile.TarFile(fileobj=component_stream, mode="w")
|
| 130 |
+
|
| 131 |
+
for file in files:
|
| 132 |
+
component_tar.add(file, filter=utils.anonymize_tarinfo)
|
| 133 |
+
|
| 134 |
+
component_tar.close()
|
| 135 |
+
component_stream.close()
|
| 136 |
+
|
| 137 |
+
return conda_pkg_fn
|
| 138 |
+
|
| 139 |
+
@staticmethod
|
| 140 |
+
def get_pkg_details(in_file):
|
| 141 |
+
stat_result = os.stat(in_file)
|
| 142 |
+
size = stat_result.st_size
|
| 143 |
+
md5, sha256 = utils.checksums(in_file, ("md5", "sha256"))
|
| 144 |
+
return {"size": size, "md5": md5, "sha256": sha256}
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/interface.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import abc
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class AbstractBaseFormat(metaclass=abc.ABCMeta):
|
| 6 |
+
@staticmethod
|
| 7 |
+
@abc.abstractmethod
|
| 8 |
+
def supported(fn): # pragma: no cover
|
| 9 |
+
return False
|
| 10 |
+
|
| 11 |
+
@staticmethod
|
| 12 |
+
@abc.abstractmethod
|
| 13 |
+
def extract(fn, dest_dir, **kw): # pragma: no cover
|
| 14 |
+
raise NotImplementedError
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
@abc.abstractmethod
|
| 18 |
+
def create(prefix, file_list, out_fn, out_folder=os.getcwd(), **kw): # pragma: no cover
|
| 19 |
+
raise NotImplementedError
|
| 20 |
+
|
| 21 |
+
@staticmethod
|
| 22 |
+
@abc.abstractmethod
|
| 23 |
+
def get_pkg_details(in_file): # pragma: no cover
|
| 24 |
+
raise NotImplementedError
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/streaming.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Exception-compatible adapter from conda_package_streaming.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
from tarfile import TarError
|
| 8 |
+
from zipfile import BadZipFile
|
| 9 |
+
|
| 10 |
+
from conda_package_streaming.extract import exceptions as cps_exceptions
|
| 11 |
+
from conda_package_streaming.extract import extract_stream, package_streaming
|
| 12 |
+
|
| 13 |
+
from . import exceptions
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _extract(filename: str, dest_dir: str, components: list[str]):
|
| 17 |
+
"""
|
| 18 |
+
Extract .conda or .tar.bz2 package to dest_dir.
|
| 19 |
+
|
| 20 |
+
If it's a conda package, components may be ["pkg", "info"]
|
| 21 |
+
|
| 22 |
+
If it's a .tar.bz2 package, components must equal ["pkg"]
|
| 23 |
+
|
| 24 |
+
Internal. Skip directly to conda-package-streaming if you don't need
|
| 25 |
+
exception compatibility.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
if str(filename).endswith(".tar.bz2"):
|
| 29 |
+
assert components == ["pkg"]
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
with open(filename, "rb") as fileobj:
|
| 33 |
+
for component in components:
|
| 34 |
+
# will parse zipfile twice
|
| 35 |
+
stream = package_streaming.stream_conda_component(
|
| 36 |
+
filename, fileobj, component=component
|
| 37 |
+
)
|
| 38 |
+
extract_stream(stream, dest_dir)
|
| 39 |
+
except cps_exceptions.CaseInsensitiveFileSystemError as e:
|
| 40 |
+
raise exceptions.CaseInsensitiveFileSystemError(filename, dest_dir) from e
|
| 41 |
+
except (OSError, TarError, BadZipFile) as e:
|
| 42 |
+
raise exceptions.InvalidArchiveError(filename, f"failed with error: {str(e)}") from e
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/utils.py
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import contextlib
|
| 2 |
+
import fnmatch
|
| 3 |
+
import hashlib
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
import shutil
|
| 8 |
+
import sys
|
| 9 |
+
import warnings as _warnings
|
| 10 |
+
from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
|
| 11 |
+
from errno import EACCES, ENOENT, EPERM, EROFS
|
| 12 |
+
from itertools import chain
|
| 13 |
+
from os.path import (
|
| 14 |
+
abspath,
|
| 15 |
+
basename,
|
| 16 |
+
dirname,
|
| 17 |
+
isdir,
|
| 18 |
+
isfile,
|
| 19 |
+
islink,
|
| 20 |
+
join,
|
| 21 |
+
lexists,
|
| 22 |
+
normpath,
|
| 23 |
+
split,
|
| 24 |
+
)
|
| 25 |
+
from stat import S_IEXEC, S_IMODE, S_ISDIR, S_ISREG, S_IWRITE
|
| 26 |
+
from subprocess import STDOUT, CalledProcessError, check_output, list2cmdline
|
| 27 |
+
from tempfile import NamedTemporaryFile, mkdtemp
|
| 28 |
+
|
| 29 |
+
on_win = sys.platform == "win32"
|
| 30 |
+
log = logging.getLogger(__file__)
|
| 31 |
+
CONDA_TEMP_EXTENSION = ".c~"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def which(executable):
|
| 35 |
+
from distutils.spawn import find_executable
|
| 36 |
+
|
| 37 |
+
return find_executable(executable)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def make_writable(path):
|
| 41 |
+
try:
|
| 42 |
+
mode = os.lstat(path).st_mode
|
| 43 |
+
if S_ISDIR(mode):
|
| 44 |
+
os.chmod(path, S_IMODE(mode) | S_IWRITE | S_IEXEC)
|
| 45 |
+
elif S_ISREG(mode) or islink(path):
|
| 46 |
+
os.chmod(path, S_IMODE(mode) | S_IWRITE)
|
| 47 |
+
else:
|
| 48 |
+
log.debug("path cannot be made writable: %s", path)
|
| 49 |
+
return True
|
| 50 |
+
except Exception as e:
|
| 51 |
+
eno = getattr(e, "errno", None)
|
| 52 |
+
if eno in (ENOENT,):
|
| 53 |
+
log.debug("tried to make writable, but didn't exist: %s", path)
|
| 54 |
+
raise
|
| 55 |
+
elif eno in (EACCES, EPERM, EROFS):
|
| 56 |
+
log.debug("tried make writable but failed: %s\n%r", path, e)
|
| 57 |
+
return False
|
| 58 |
+
else:
|
| 59 |
+
log.warn("Error making path writable: %s\n%r", path, e)
|
| 60 |
+
raise
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class DummyExecutor(Executor):
|
| 64 |
+
def map(self, func, *iterables):
|
| 65 |
+
for iterable in iterables:
|
| 66 |
+
for thing in iterable:
|
| 67 |
+
yield func(thing)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_executor(processes):
|
| 71 |
+
return DummyExecutor() if processes == 1 else ProcessPoolExecutor(max_workers=processes)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def recursive_make_writable(path):
|
| 75 |
+
# The need for this function was pointed out at
|
| 76 |
+
# https://github.com/conda/conda/issues/3266#issuecomment-239241915
|
| 77 |
+
# Especially on windows, file removal will often fail because it is marked read-only
|
| 78 |
+
if isdir(path):
|
| 79 |
+
for root, dirs, files in os.walk(path):
|
| 80 |
+
for path in chain.from_iterable((files, dirs)):
|
| 81 |
+
try:
|
| 82 |
+
make_writable(join(root, path))
|
| 83 |
+
except:
|
| 84 |
+
pass
|
| 85 |
+
else:
|
| 86 |
+
try:
|
| 87 |
+
make_writable(path)
|
| 88 |
+
except:
|
| 89 |
+
pass
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def quote_for_shell(arguments, shell=None):
|
| 93 |
+
if not shell:
|
| 94 |
+
shell = "cmd.exe" if on_win else "bash"
|
| 95 |
+
if shell == "cmd.exe":
|
| 96 |
+
return list2cmdline(arguments)
|
| 97 |
+
else:
|
| 98 |
+
# If any multiline argument gets mixed with any other argument (which is true if we've
|
| 99 |
+
# arrived in this function) then we just quote it. This assumes something like:
|
| 100 |
+
# ['python', '-c', 'a\nmultiline\nprogram\n']
|
| 101 |
+
# It may make sense to allow specifying a replacement character for '\n' too? e.g. ';'
|
| 102 |
+
quoted = []
|
| 103 |
+
# This could all be replaced with some regex wizardry but that is less readable and
|
| 104 |
+
# for code like this, readability is very important.
|
| 105 |
+
for arg in arguments:
|
| 106 |
+
if '"' in arg:
|
| 107 |
+
quote = "'"
|
| 108 |
+
elif "'" in arg:
|
| 109 |
+
quote = '"'
|
| 110 |
+
elif not any(_ in arg for _ in (" ", "\n")):
|
| 111 |
+
quote = ""
|
| 112 |
+
else:
|
| 113 |
+
quote = '"'
|
| 114 |
+
quoted.append(quote + arg + quote)
|
| 115 |
+
return " ".join(quoted)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def rmtree(path, *args, **kwargs):
|
| 119 |
+
# subprocessing to delete large folders can be quite a bit faster
|
| 120 |
+
path = normpath(path)
|
| 121 |
+
if on_win:
|
| 122 |
+
try:
|
| 123 |
+
# the fastest way seems to be using DEL to recursively delete files
|
| 124 |
+
# https://www.ghacks.net/2017/07/18/how-to-delete-large-folders-in-windows-super-fast/
|
| 125 |
+
# However, this is not entirely safe, as it can end up following symlinks to folders
|
| 126 |
+
# https://superuser.com/a/306618/184799
|
| 127 |
+
# so, we stick with the slower, but hopefully safer way. Maybe if we figured out how
|
| 128 |
+
# to scan for any possible symlinks, we could do the faster way.
|
| 129 |
+
# out = check_output('DEL /F/Q/S *.* > NUL 2> NUL'.format(path), shell=True,
|
| 130 |
+
# stderr=STDOUT, cwd=path)
|
| 131 |
+
|
| 132 |
+
out = check_output(f'RD /S /Q "{path}" > NUL 2> NUL', shell=True, stderr=STDOUT)
|
| 133 |
+
except:
|
| 134 |
+
try:
|
| 135 |
+
# Try to delete in Unicode
|
| 136 |
+
name = None
|
| 137 |
+
|
| 138 |
+
with NamedTemporaryFile(suffix=".bat", delete=False) as batch_file:
|
| 139 |
+
batch_file.write(f"RD /S {quote_for_shell([path])}\n")
|
| 140 |
+
batch_file.write("chcp 65001\n")
|
| 141 |
+
batch_file.write(f"RD /S {quote_for_shell([path])}\n")
|
| 142 |
+
batch_file.write("EXIT 0\n")
|
| 143 |
+
name = batch_file.name
|
| 144 |
+
# If the above is bugged we can end up deleting hard-drives, so we check
|
| 145 |
+
# that 'path' appears in it. This is not bulletproof but it could save you (me).
|
| 146 |
+
with open(name) as contents:
|
| 147 |
+
content = contents.read()
|
| 148 |
+
assert path in content
|
| 149 |
+
comspec = os.environ["COMSPEC"]
|
| 150 |
+
CREATE_NO_WINDOW = 0x08000000
|
| 151 |
+
# It is essential that we `pass stdout=None, stderr=None, stdin=None` here because
|
| 152 |
+
# if we do not, then the standard console handles get attached and chcp affects the
|
| 153 |
+
# parent process (and any which share those console handles!)
|
| 154 |
+
out = check_output(
|
| 155 |
+
[comspec, "/d", "/c", name],
|
| 156 |
+
shell=False,
|
| 157 |
+
stdout=None,
|
| 158 |
+
stderr=None,
|
| 159 |
+
stdin=None,
|
| 160 |
+
creationflags=CREATE_NO_WINDOW,
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
except CalledProcessError as e:
|
| 164 |
+
if e.returncode != 5:
|
| 165 |
+
log.error(f"Removing folder {path} the fast way failed. Output was: {out}")
|
| 166 |
+
raise
|
| 167 |
+
else:
|
| 168 |
+
log.debug(f"removing dir contents the fast way failed. Output was: {out}")
|
| 169 |
+
else:
|
| 170 |
+
try:
|
| 171 |
+
os.makedirs(".empty")
|
| 172 |
+
except:
|
| 173 |
+
pass
|
| 174 |
+
# yes, this looks strange. See
|
| 175 |
+
# https://unix.stackexchange.com/a/79656/34459
|
| 176 |
+
# https://web.archive.org/web/20130929001850/http://linuxnote.net/jianingy/en/linux/a-fast-way-to-remove-huge-number-of-files.html # NOQA
|
| 177 |
+
rsync = which("rsync")
|
| 178 |
+
if rsync and isdir(".empty"):
|
| 179 |
+
try:
|
| 180 |
+
out = check_output(
|
| 181 |
+
[
|
| 182 |
+
rsync,
|
| 183 |
+
"-a",
|
| 184 |
+
"--force",
|
| 185 |
+
"--delete",
|
| 186 |
+
join(os.getcwd(), ".empty") + "/",
|
| 187 |
+
path + "/",
|
| 188 |
+
],
|
| 189 |
+
stderr=STDOUT,
|
| 190 |
+
)
|
| 191 |
+
except CalledProcessError:
|
| 192 |
+
log.debug(f"removing dir contents the fast way failed. Output was: {out}")
|
| 193 |
+
shutil.rmtree(".empty")
|
| 194 |
+
shutil.rmtree(path)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def unlink_or_rename_to_trash(path):
|
| 198 |
+
"""If files are in use, especially on windows, we can't remove them.
|
| 199 |
+
The fallback path is to rename them (but keep their folder the same),
|
| 200 |
+
which maintains the file handle validity. See comments at:
|
| 201 |
+
https://serverfault.com/a/503769
|
| 202 |
+
"""
|
| 203 |
+
try:
|
| 204 |
+
make_writable(path)
|
| 205 |
+
os.unlink(path)
|
| 206 |
+
except OSError:
|
| 207 |
+
try:
|
| 208 |
+
os.rename(path, path + ".conda_trash")
|
| 209 |
+
except OSError:
|
| 210 |
+
if on_win:
|
| 211 |
+
# on windows, it is important to use the rename program, as just using python's
|
| 212 |
+
# rename leads to permission errors when files are in use.
|
| 213 |
+
with NamedTemporaryFile(suffix=".bat") as trash_script:
|
| 214 |
+
with open(trash_script, "w") as f:
|
| 215 |
+
f.write('@pushd "%1"\n')
|
| 216 |
+
f.write("@REM Rename src to dest")
|
| 217 |
+
f.write('@ren "%2" "%3" > NUL 2> NUL")')
|
| 218 |
+
|
| 219 |
+
_dirname, _fn = split(path)
|
| 220 |
+
dest_fn = path + ".conda_trash"
|
| 221 |
+
counter = 1
|
| 222 |
+
while isfile(dest_fn):
|
| 223 |
+
dest_fn = dest_fn.splitext[0] + f".conda_trash_{counter}"
|
| 224 |
+
counter += 1
|
| 225 |
+
out = "< empty >"
|
| 226 |
+
try:
|
| 227 |
+
out = check_output(
|
| 228 |
+
[
|
| 229 |
+
"cmd.exe",
|
| 230 |
+
"/C",
|
| 231 |
+
trash_script,
|
| 232 |
+
_dirname,
|
| 233 |
+
_fn,
|
| 234 |
+
basename(dest_fn),
|
| 235 |
+
],
|
| 236 |
+
stderr=STDOUT,
|
| 237 |
+
)
|
| 238 |
+
except CalledProcessError:
|
| 239 |
+
log.warn(
|
| 240 |
+
"renaming file path {} to trash failed. Output was: {}".format(
|
| 241 |
+
path, out
|
| 242 |
+
)
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
log.warn(
|
| 246 |
+
"Could not remove or rename {}. Please remove this file manually (you "
|
| 247 |
+
"may need to reboot to free file handles)".format(path)
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def remove_empty_parent_paths(path):
|
| 252 |
+
# recurse to clean up empty folders that were created to have a nested hierarchy
|
| 253 |
+
parent_path = dirname(path)
|
| 254 |
+
while isdir(parent_path) and not os.listdir(parent_path):
|
| 255 |
+
rmdir(parent_path)
|
| 256 |
+
parent_path = dirname(parent_path)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def rm_rf(path, clean_empty_parents=False, *args, **kw):
|
| 260 |
+
"""
|
| 261 |
+
Completely delete path
|
| 262 |
+
max_retries is the number of times to retry on failure. The default is 5. This only applies
|
| 263 |
+
to deleting a directory.
|
| 264 |
+
If removing path fails and trash is True, files will be moved to the trash directory.
|
| 265 |
+
"""
|
| 266 |
+
recursive_make_writable(path)
|
| 267 |
+
try:
|
| 268 |
+
path = abspath(path)
|
| 269 |
+
if isdir(path) and not islink(path):
|
| 270 |
+
rmdir(path)
|
| 271 |
+
elif lexists(path):
|
| 272 |
+
unlink_or_rename_to_trash(path)
|
| 273 |
+
else:
|
| 274 |
+
log.debug("rm_rf failed. Not a link, file, or directory: %s", path)
|
| 275 |
+
finally:
|
| 276 |
+
if lexists(path):
|
| 277 |
+
log.info("rm_rf failed for %s", path)
|
| 278 |
+
return False
|
| 279 |
+
if isdir(path):
|
| 280 |
+
delete_trash(path)
|
| 281 |
+
if clean_empty_parents:
|
| 282 |
+
remove_empty_parent_paths(path)
|
| 283 |
+
return True
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
# aliases that all do the same thing (legacy compat)
|
| 287 |
+
try_rmdir_all_empty = move_to_trash = move_path_to_trash = rm_rf
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def delete_trash(prefix):
|
| 291 |
+
if not prefix:
|
| 292 |
+
prefix = sys.prefix
|
| 293 |
+
exclude = {"envs"}
|
| 294 |
+
for root, dirs, files in os.walk(prefix, topdown=True):
|
| 295 |
+
dirs[:] = [d for d in dirs if d not in exclude]
|
| 296 |
+
for fn in files:
|
| 297 |
+
if fnmatch.fnmatch(fn, "*.conda_trash*") or fnmatch.fnmatch(
|
| 298 |
+
fn, "*" + CONDA_TEMP_EXTENSION
|
| 299 |
+
):
|
| 300 |
+
filename = join(root, fn)
|
| 301 |
+
try:
|
| 302 |
+
os.unlink(filename)
|
| 303 |
+
remove_empty_parent_paths(filename)
|
| 304 |
+
except OSError as e:
|
| 305 |
+
log.debug("%r errno %d\nCannot unlink %s.", e, e.errno, filename)
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def rmdir(dirpath):
|
| 309 |
+
if not isdir(dirpath):
|
| 310 |
+
return
|
| 311 |
+
try:
|
| 312 |
+
rmtree(dirpath)
|
| 313 |
+
# we don't really care about errors that much. We'll catch remaining files
|
| 314 |
+
# with slower python logic.
|
| 315 |
+
except:
|
| 316 |
+
pass
|
| 317 |
+
|
| 318 |
+
for root, dirs, files in os.walk(dirpath, topdown=False):
|
| 319 |
+
for f in files:
|
| 320 |
+
unlink_or_rename_to_trash(join(root, f))
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
# we have our own TemporaryDirectory class because it's faster and handles disk issues better.
|
| 324 |
+
class TemporaryDirectory:
|
| 325 |
+
"""Create and return a temporary directory. This has the same
|
| 326 |
+
behavior as mkdtemp but can be used as a context manager. For
|
| 327 |
+
example:
|
| 328 |
+
|
| 329 |
+
with TemporaryDirectory() as tmpdir:
|
| 330 |
+
...
|
| 331 |
+
|
| 332 |
+
Upon exiting the context, the directory and everything contained
|
| 333 |
+
in it are removed.
|
| 334 |
+
"""
|
| 335 |
+
|
| 336 |
+
# Handle mkdtemp raising an exception
|
| 337 |
+
name = None
|
| 338 |
+
_closed = False
|
| 339 |
+
|
| 340 |
+
def __init__(self, suffix="", prefix=".cph_tmp", dir=os.getcwd()):
|
| 341 |
+
self.name = mkdtemp(suffix, prefix, dir)
|
| 342 |
+
|
| 343 |
+
def __repr__(self):
|
| 344 |
+
return f"<{self.__class__.__name__} {self.name!r}>"
|
| 345 |
+
|
| 346 |
+
def __enter__(self):
|
| 347 |
+
return self.name
|
| 348 |
+
|
| 349 |
+
def cleanup(self, _warn=False, _warnings=_warnings):
|
| 350 |
+
if self.name and not self._closed:
|
| 351 |
+
try:
|
| 352 |
+
rm_rf(self.name)
|
| 353 |
+
except:
|
| 354 |
+
_warnings.warn(
|
| 355 |
+
'Conda-package-handling says: "I tried to clean up, '
|
| 356 |
+
"but I could not. There is a mess in %s that you might "
|
| 357 |
+
'want to clean up yourself. Sorry..."' % self.name
|
| 358 |
+
)
|
| 359 |
+
self._closed = True
|
| 360 |
+
if _warn and _warnings.warn:
|
| 361 |
+
_warnings.warn(
|
| 362 |
+
f"Implicitly cleaning up {self!r}",
|
| 363 |
+
_warnings.ResourceWarning,
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
def __exit__(self, exc, value, tb):
|
| 367 |
+
self.cleanup()
|
| 368 |
+
|
| 369 |
+
def __del__(self):
|
| 370 |
+
# Issue a ResourceWarning if implicit cleanup needed
|
| 371 |
+
self.cleanup(_warn=True)
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
@contextlib.contextmanager
|
| 375 |
+
def tmp_chdir(dest):
|
| 376 |
+
curdir = os.getcwd()
|
| 377 |
+
try:
|
| 378 |
+
os.chdir(dest)
|
| 379 |
+
yield
|
| 380 |
+
finally:
|
| 381 |
+
os.chdir(curdir)
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
def ensure_list(arg):
|
| 385 |
+
if isinstance(arg, str) or not hasattr(arg, "__iter__"):
|
| 386 |
+
if arg is not None:
|
| 387 |
+
arg = [arg]
|
| 388 |
+
else:
|
| 389 |
+
arg = []
|
| 390 |
+
return arg
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def filter_files(
|
| 394 |
+
files_list,
|
| 395 |
+
prefix,
|
| 396 |
+
filter_patterns=(
|
| 397 |
+
r"(.*[\\\\/])?\.git[\\\\/].*",
|
| 398 |
+
r"(.*[\\\\/])?\.git$",
|
| 399 |
+
r"(.*)?\.DS_Store.*",
|
| 400 |
+
r".*\.la$",
|
| 401 |
+
r"conda-meta.*",
|
| 402 |
+
),
|
| 403 |
+
):
|
| 404 |
+
"""Remove things like the .git directory from the list of files to be copied"""
|
| 405 |
+
for pattern in filter_patterns:
|
| 406 |
+
r = re.compile(pattern)
|
| 407 |
+
files_list = set(files_list) - set(filter(r.match, files_list))
|
| 408 |
+
return [
|
| 409 |
+
f
|
| 410 |
+
for f in files_list
|
| 411 |
+
if
|
| 412 |
+
# `islink` prevents symlinks to directories from being removed
|
| 413 |
+
os.path.islink(os.path.join(prefix, f)) or not os.path.isdir(os.path.join(prefix, f))
|
| 414 |
+
]
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
def filter_info_files(files_list, prefix):
|
| 418 |
+
return filter_files(
|
| 419 |
+
files_list,
|
| 420 |
+
prefix,
|
| 421 |
+
filter_patterns=(
|
| 422 |
+
"info[\\\\/]index\\.json",
|
| 423 |
+
"info[\\\\/]files",
|
| 424 |
+
"info[\\\\/]paths\\.json",
|
| 425 |
+
"info[\\\\/]about\\.json",
|
| 426 |
+
"info[\\\\/]has_prefix",
|
| 427 |
+
"info[\\\\/]hash_input_files", # legacy, not used anymore
|
| 428 |
+
"info[\\\\/]hash_input\\.json",
|
| 429 |
+
"info[\\\\/]run_exports\\.yaml", # legacy
|
| 430 |
+
"info[\\\\/]run_exports\\.json", # current
|
| 431 |
+
"info[\\\\/]git",
|
| 432 |
+
"info[\\\\/]recipe[\\\\/].*",
|
| 433 |
+
"info[\\\\/]recipe_log.json",
|
| 434 |
+
"info[\\\\/]recipe.tar",
|
| 435 |
+
"info[\\\\/]test[\\\\/].*",
|
| 436 |
+
"info[\\\\/]LICENSE.*",
|
| 437 |
+
"info[\\\\/]requires",
|
| 438 |
+
"info[\\\\/]meta",
|
| 439 |
+
"info[\\\\/]platform",
|
| 440 |
+
"info[\\\\/]no_link",
|
| 441 |
+
"info[\\\\/]link\\.json",
|
| 442 |
+
"info[\\\\/]icon\\.png",
|
| 443 |
+
),
|
| 444 |
+
)
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
def _checksum(fd, algorithm, buffersize=65536):
|
| 448 |
+
hash_impl = getattr(hashlib, algorithm)
|
| 449 |
+
if not hash_impl:
|
| 450 |
+
raise ValueError(f"Unrecognized hash algorithm: {algorithm}")
|
| 451 |
+
else:
|
| 452 |
+
hash_impl = hash_impl()
|
| 453 |
+
for block in iter(lambda: fd.read(buffersize), b""):
|
| 454 |
+
hash_impl.update(block)
|
| 455 |
+
return hash_impl.hexdigest()
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
def sha256_checksum(fd):
|
| 459 |
+
return _checksum(fd, "sha256")
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def md5_checksum(fd):
|
| 463 |
+
return _checksum(fd, "md5")
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
def checksum(fn, algorithm, buffersize=1 << 18):
|
| 467 |
+
"""
|
| 468 |
+
Calculate a checksum for a filename (not an open file).
|
| 469 |
+
"""
|
| 470 |
+
with open(fn, "rb") as fd:
|
| 471 |
+
return _checksum(fd, algorithm, buffersize)
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
def checksums(fn, algorithms, buffersize=1 << 18):
|
| 475 |
+
"""
|
| 476 |
+
Calculate multiple checksums for a filename in parallel.
|
| 477 |
+
"""
|
| 478 |
+
with ThreadPoolExecutor(max_workers=len(algorithms)) as e:
|
| 479 |
+
# take care not to share hash_impl between threads
|
| 480 |
+
results = [e.submit(checksum, fn, algorithm, buffersize) for algorithm in algorithms]
|
| 481 |
+
return [result.result() for result in results]
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
def anonymize_tarinfo(tarinfo):
|
| 485 |
+
"""
|
| 486 |
+
Remove user id, name from tarinfo.
|
| 487 |
+
"""
|
| 488 |
+
# also remove timestamps?
|
| 489 |
+
tarinfo.uid = 0
|
| 490 |
+
tarinfo.uname = ""
|
| 491 |
+
tarinfo.gid = 0
|
| 492 |
+
tarinfo.gname = ""
|
| 493 |
+
return tarinfo
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/validate.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import hashlib
|
| 4 |
+
import os
|
| 5 |
+
from itertools import chain
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
from conda_package_streaming import package_streaming
|
| 9 |
+
|
| 10 |
+
from .utils import TemporaryDirectory
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def validate_converted_files_match(
|
| 14 |
+
src_file_or_folder, subject, reference_ext=""
|
| 15 |
+
): # pragma: nocover
|
| 16 |
+
# No longer used by conda-package-handling
|
| 17 |
+
from .api import extract
|
| 18 |
+
|
| 19 |
+
with TemporaryDirectory() as tmpdir:
|
| 20 |
+
assert tmpdir is not None
|
| 21 |
+
if os.path.isdir(src_file_or_folder):
|
| 22 |
+
src_folder = src_file_or_folder
|
| 23 |
+
else:
|
| 24 |
+
extract(src_file_or_folder + reference_ext, dest_dir=os.path.join(tmpdir, "src"))
|
| 25 |
+
src_folder = os.path.join(tmpdir, "src")
|
| 26 |
+
|
| 27 |
+
converted_folder = os.path.join(tmpdir, "converted")
|
| 28 |
+
extract(subject, dest_dir=converted_folder)
|
| 29 |
+
|
| 30 |
+
missing_files = set()
|
| 31 |
+
mismatch_size = set()
|
| 32 |
+
for root, dirs, files in os.walk(src_folder):
|
| 33 |
+
for f in files:
|
| 34 |
+
absfile = os.path.join(root, f)
|
| 35 |
+
rp = os.path.relpath(absfile, src_folder)
|
| 36 |
+
destpath = os.path.join(converted_folder, rp)
|
| 37 |
+
if not os.path.islink(destpath):
|
| 38 |
+
if not os.path.isfile(destpath):
|
| 39 |
+
missing_files.add(rp)
|
| 40 |
+
elif os.stat(absfile).st_size != os.stat(destpath).st_size:
|
| 41 |
+
mismatch_size.add(rp)
|
| 42 |
+
return src_file_or_folder, missing_files, mismatch_size
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def hash_fn():
|
| 46 |
+
return hashlib.blake2b()
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
IGNORE_FIELDS = {
|
| 50 |
+
"uid",
|
| 51 |
+
"gid",
|
| 52 |
+
"mtime",
|
| 53 |
+
"uname",
|
| 54 |
+
"gname",
|
| 55 |
+
"chksum",
|
| 56 |
+
} #: ignore if not strict
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def validate_converted_files_match_streaming(
|
| 60 |
+
src: str | Path, reference: str | Path, *, strict=True
|
| 61 |
+
):
|
| 62 |
+
"""
|
| 63 |
+
Check that two .tar.bz2 or .conda files (either of src_file and
|
| 64 |
+
reference_file can be either format) match exactly, down to the timestamps
|
| 65 |
+
etc.
|
| 66 |
+
|
| 67 |
+
Does not check outside of the info- and pkg- components of a .conda.
|
| 68 |
+
(conda's metadata.json, which gives the version "2" of the format)
|
| 69 |
+
|
| 70 |
+
If strict = True, also check for matching uid, gid, mtime, uname, gname.
|
| 71 |
+
"""
|
| 72 |
+
source_set = {}
|
| 73 |
+
reference_set = {}
|
| 74 |
+
ignore_fields = {"chksum"} if strict else IGNORE_FIELDS
|
| 75 |
+
|
| 76 |
+
def get_fileset(filename: str | Path):
|
| 77 |
+
fileset = {}
|
| 78 |
+
components = ["info", "pkg"] if os.fspath(filename).endswith(".conda") else ["pkg"]
|
| 79 |
+
with open(filename, "rb") as conda_file:
|
| 80 |
+
for component in components:
|
| 81 |
+
for tar, member in package_streaming.stream_conda_component(
|
| 82 |
+
filename, conda_file, component
|
| 83 |
+
):
|
| 84 |
+
info = {k: v for k, v in member.get_info().items() if k not in ignore_fields}
|
| 85 |
+
|
| 86 |
+
if member.isfile():
|
| 87 |
+
hasher = hash_fn()
|
| 88 |
+
fd = tar.extractfile(member)
|
| 89 |
+
assert fd is not None
|
| 90 |
+
for block in iter(lambda: fd.read(1 << 18), b""): # type: ignore
|
| 91 |
+
hasher.update(block)
|
| 92 |
+
|
| 93 |
+
info["digest"] = hasher.hexdigest()
|
| 94 |
+
|
| 95 |
+
fileset[info["name"]] = info
|
| 96 |
+
|
| 97 |
+
return fileset
|
| 98 |
+
|
| 99 |
+
source_set = get_fileset(src)
|
| 100 |
+
reference_set = get_fileset(reference)
|
| 101 |
+
|
| 102 |
+
missing = []
|
| 103 |
+
mismatched = []
|
| 104 |
+
|
| 105 |
+
if source_set != reference_set:
|
| 106 |
+
for file in chain(source_set, reference_set):
|
| 107 |
+
if not (file in source_set and file in reference_set):
|
| 108 |
+
missing.append(file)
|
| 109 |
+
elif source_set[file] != reference_set[file]:
|
| 110 |
+
mismatched.append(file)
|
| 111 |
+
|
| 112 |
+
return src, missing, mismatched
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/distutils-precedence.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2638ce9e2500e572a5e0de7faed6661eb569d1b696fcba07b0dd223da5f5d224
|
| 3 |
+
size 151
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/future/__init__.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
future: Easy, safe support for Python 2/3 compatibility
|
| 3 |
+
=======================================================
|
| 4 |
+
|
| 5 |
+
``future`` is the missing compatibility layer between Python 2 and Python
|
| 6 |
+
3. It allows you to use a single, clean Python 3.x-compatible codebase to
|
| 7 |
+
support both Python 2 and Python 3 with minimal overhead.
|
| 8 |
+
|
| 9 |
+
It is designed to be used as follows::
|
| 10 |
+
|
| 11 |
+
from __future__ import (absolute_import, division,
|
| 12 |
+
print_function, unicode_literals)
|
| 13 |
+
from builtins import (
|
| 14 |
+
bytes, dict, int, list, object, range, str,
|
| 15 |
+
ascii, chr, hex, input, next, oct, open,
|
| 16 |
+
pow, round, super,
|
| 17 |
+
filter, map, zip)
|
| 18 |
+
|
| 19 |
+
followed by predominantly standard, idiomatic Python 3 code that then runs
|
| 20 |
+
similarly on Python 2.6/2.7 and Python 3.3+.
|
| 21 |
+
|
| 22 |
+
The imports have no effect on Python 3. On Python 2, they shadow the
|
| 23 |
+
corresponding builtins, which normally have different semantics on Python 3
|
| 24 |
+
versus 2, to provide their Python 3 semantics.
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
Standard library reorganization
|
| 28 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 29 |
+
|
| 30 |
+
``future`` supports the standard library reorganization (PEP 3108) through the
|
| 31 |
+
following Py3 interfaces:
|
| 32 |
+
|
| 33 |
+
>>> # Top-level packages with Py3 names provided on Py2:
|
| 34 |
+
>>> import html.parser
|
| 35 |
+
>>> import queue
|
| 36 |
+
>>> import tkinter.dialog
|
| 37 |
+
>>> import xmlrpc.client
|
| 38 |
+
>>> # etc.
|
| 39 |
+
|
| 40 |
+
>>> # Aliases provided for extensions to existing Py2 module names:
|
| 41 |
+
>>> from future.standard_library import install_aliases
|
| 42 |
+
>>> install_aliases()
|
| 43 |
+
|
| 44 |
+
>>> from collections import Counter, OrderedDict # backported to Py2.6
|
| 45 |
+
>>> from collections import UserDict, UserList, UserString
|
| 46 |
+
>>> import urllib.request
|
| 47 |
+
>>> from itertools import filterfalse, zip_longest
|
| 48 |
+
>>> from subprocess import getoutput, getstatusoutput
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
Automatic conversion
|
| 52 |
+
--------------------
|
| 53 |
+
|
| 54 |
+
An included script called `futurize
|
| 55 |
+
<http://python-future.org/automatic_conversion.html>`_ aids in converting
|
| 56 |
+
code (from either Python 2 or Python 3) to code compatible with both
|
| 57 |
+
platforms. It is similar to ``python-modernize`` but goes further in
|
| 58 |
+
providing Python 3 compatibility through the use of the backported types
|
| 59 |
+
and builtin functions in ``future``.
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
Documentation
|
| 63 |
+
-------------
|
| 64 |
+
|
| 65 |
+
See: http://python-future.org
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
Credits
|
| 69 |
+
-------
|
| 70 |
+
|
| 71 |
+
:Author: Ed Schofield, Jordan M. Adler, et al
|
| 72 |
+
:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
|
| 73 |
+
Ltd, Singapore. http://pythoncharmers.com
|
| 74 |
+
:Others: See docs/credits.rst or http://python-future.org/credits.html
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
Licensing
|
| 78 |
+
---------
|
| 79 |
+
Copyright 2013-2019 Python Charmers Pty Ltd, Australia.
|
| 80 |
+
The software is distributed under an MIT licence. See LICENSE.txt.
|
| 81 |
+
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
__title__ = 'future'
|
| 85 |
+
__author__ = 'Ed Schofield'
|
| 86 |
+
__license__ = 'MIT'
|
| 87 |
+
__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd'
|
| 88 |
+
__ver_major__ = 0
|
| 89 |
+
__ver_minor__ = 18
|
| 90 |
+
__ver_patch__ = 2
|
| 91 |
+
__ver_sub__ = ''
|
| 92 |
+
__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
|
| 93 |
+
__ver_patch__, __ver_sub__)
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/METADATA
ADDED
|
@@ -0,0 +1,530 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.1
|
| 2 |
+
Name: psutil
|
| 3 |
+
Version: 5.9.8
|
| 4 |
+
Summary: Cross-platform lib for process and system monitoring in Python.
|
| 5 |
+
Home-page: https://github.com/giampaolo/psutil
|
| 6 |
+
Author: Giampaolo Rodola
|
| 7 |
+
Author-email: g.rodola@gmail.com
|
| 8 |
+
License: BSD-3-Clause
|
| 9 |
+
Keywords: ps,top,kill,free,lsof,netstat,nice,tty,ionice,uptime,taskmgr,process,df,iotop,iostat,ifconfig,taskset,who,pidof,pmap,smem,pstree,monitoring,ulimit,prlimit,smem,performance,metrics,agent,observability
|
| 10 |
+
Platform: Platform Independent
|
| 11 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 12 |
+
Classifier: Environment :: Console
|
| 13 |
+
Classifier: Environment :: Win32 (MS Windows)
|
| 14 |
+
Classifier: Intended Audience :: Developers
|
| 15 |
+
Classifier: Intended Audience :: Information Technology
|
| 16 |
+
Classifier: Intended Audience :: System Administrators
|
| 17 |
+
Classifier: License :: OSI Approved :: BSD License
|
| 18 |
+
Classifier: Operating System :: MacOS :: MacOS X
|
| 19 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 10
|
| 20 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 7
|
| 21 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 8
|
| 22 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 8.1
|
| 23 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows Server 2003
|
| 24 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows Server 2008
|
| 25 |
+
Classifier: Operating System :: Microsoft :: Windows :: Windows Vista
|
| 26 |
+
Classifier: Operating System :: Microsoft
|
| 27 |
+
Classifier: Operating System :: OS Independent
|
| 28 |
+
Classifier: Operating System :: POSIX :: AIX
|
| 29 |
+
Classifier: Operating System :: POSIX :: BSD :: FreeBSD
|
| 30 |
+
Classifier: Operating System :: POSIX :: BSD :: NetBSD
|
| 31 |
+
Classifier: Operating System :: POSIX :: BSD :: OpenBSD
|
| 32 |
+
Classifier: Operating System :: POSIX :: BSD
|
| 33 |
+
Classifier: Operating System :: POSIX :: Linux
|
| 34 |
+
Classifier: Operating System :: POSIX :: SunOS/Solaris
|
| 35 |
+
Classifier: Operating System :: POSIX
|
| 36 |
+
Classifier: Programming Language :: C
|
| 37 |
+
Classifier: Programming Language :: Python :: 2
|
| 38 |
+
Classifier: Programming Language :: Python :: 2.7
|
| 39 |
+
Classifier: Programming Language :: Python :: 3
|
| 40 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 41 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 42 |
+
Classifier: Programming Language :: Python
|
| 43 |
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
| 44 |
+
Classifier: Topic :: Software Development :: Libraries
|
| 45 |
+
Classifier: Topic :: System :: Benchmark
|
| 46 |
+
Classifier: Topic :: System :: Hardware :: Hardware Drivers
|
| 47 |
+
Classifier: Topic :: System :: Hardware
|
| 48 |
+
Classifier: Topic :: System :: Monitoring
|
| 49 |
+
Classifier: Topic :: System :: Networking :: Monitoring :: Hardware Watchdog
|
| 50 |
+
Classifier: Topic :: System :: Networking :: Monitoring
|
| 51 |
+
Classifier: Topic :: System :: Networking
|
| 52 |
+
Classifier: Topic :: System :: Operating System
|
| 53 |
+
Classifier: Topic :: System :: Systems Administration
|
| 54 |
+
Classifier: Topic :: Utilities
|
| 55 |
+
Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*
|
| 56 |
+
Description-Content-Type: text/x-rst
|
| 57 |
+
License-File: LICENSE
|
| 58 |
+
Provides-Extra: test
|
| 59 |
+
Requires-Dist: ipaddress ; (python_version < "3.0") and extra == 'test'
|
| 60 |
+
Requires-Dist: mock ; (python_version < "3.0") and extra == 'test'
|
| 61 |
+
Requires-Dist: enum34 ; (python_version <= "3.4") and extra == 'test'
|
| 62 |
+
Requires-Dist: pywin32 ; (sys_platform == "win32") and extra == 'test'
|
| 63 |
+
Requires-Dist: wmi ; (sys_platform == "win32") and extra == 'test'
|
| 64 |
+
|
| 65 |
+
| |downloads| |stars| |forks| |contributors| |coverage|
|
| 66 |
+
| |version| |py-versions| |packages| |license|
|
| 67 |
+
| |github-actions-wheels| |github-actions-bsd| |appveyor| |doc| |twitter| |tidelift|
|
| 68 |
+
|
| 69 |
+
.. |downloads| image:: https://img.shields.io/pypi/dm/psutil.svg
|
| 70 |
+
:target: https://pepy.tech/project/psutil
|
| 71 |
+
:alt: Downloads
|
| 72 |
+
|
| 73 |
+
.. |stars| image:: https://img.shields.io/github/stars/giampaolo/psutil.svg
|
| 74 |
+
:target: https://github.com/giampaolo/psutil/stargazers
|
| 75 |
+
:alt: Github stars
|
| 76 |
+
|
| 77 |
+
.. |forks| image:: https://img.shields.io/github/forks/giampaolo/psutil.svg
|
| 78 |
+
:target: https://github.com/giampaolo/psutil/network/members
|
| 79 |
+
:alt: Github forks
|
| 80 |
+
|
| 81 |
+
.. |contributors| image:: https://img.shields.io/github/contributors/giampaolo/psutil.svg
|
| 82 |
+
:target: https://github.com/giampaolo/psutil/graphs/contributors
|
| 83 |
+
:alt: Contributors
|
| 84 |
+
|
| 85 |
+
.. |github-actions-wheels| image:: https://img.shields.io/github/actions/workflow/status/giampaolo/psutil/.github/workflows/build.yml?label=Linux%2C%20macOS%2C%20Windows
|
| 86 |
+
:target: https://github.com/giampaolo/psutil/actions?query=workflow%3Abuild
|
| 87 |
+
:alt: Linux, macOS, Windows
|
| 88 |
+
|
| 89 |
+
.. |github-actions-bsd| image:: https://img.shields.io/github/actions/workflow/status/giampaolo/psutil/.github/workflows/bsd.yml?label=FreeBSD,%20NetBSD,%20OpenBSD
|
| 90 |
+
:target: https://github.com/giampaolo/psutil/actions?query=workflow%3Absd-tests
|
| 91 |
+
:alt: FreeBSD, NetBSD, OpenBSD
|
| 92 |
+
|
| 93 |
+
.. |appveyor| image:: https://img.shields.io/appveyor/build/giampaolo/psutil/master.svg?maxAge=3600&label=Windows%20(py2)
|
| 94 |
+
:target: https://ci.appveyor.com/project/giampaolo/psutil
|
| 95 |
+
:alt: Windows (Appveyor)
|
| 96 |
+
|
| 97 |
+
.. |coverage| image:: https://coveralls.io/repos/github/giampaolo/psutil/badge.svg?branch=master
|
| 98 |
+
:target: https://coveralls.io/github/giampaolo/psutil?branch=master
|
| 99 |
+
:alt: Test coverage (coverall.io)
|
| 100 |
+
|
| 101 |
+
.. |doc| image:: https://readthedocs.org/projects/psutil/badge/?version=latest
|
| 102 |
+
:target: https://psutil.readthedocs.io/en/latest/
|
| 103 |
+
:alt: Documentation Status
|
| 104 |
+
|
| 105 |
+
.. |version| image:: https://img.shields.io/pypi/v/psutil.svg?label=pypi
|
| 106 |
+
:target: https://pypi.org/project/psutil
|
| 107 |
+
:alt: Latest version
|
| 108 |
+
|
| 109 |
+
.. |py-versions| image:: https://img.shields.io/pypi/pyversions/psutil.svg
|
| 110 |
+
:alt: Supported Python versions
|
| 111 |
+
|
| 112 |
+
.. |packages| image:: https://repology.org/badge/tiny-repos/python:psutil.svg
|
| 113 |
+
:target: https://repology.org/metapackage/python:psutil/versions
|
| 114 |
+
:alt: Binary packages
|
| 115 |
+
|
| 116 |
+
.. |license| image:: https://img.shields.io/pypi/l/psutil.svg
|
| 117 |
+
:target: https://github.com/giampaolo/psutil/blob/master/LICENSE
|
| 118 |
+
:alt: License
|
| 119 |
+
|
| 120 |
+
.. |twitter| image:: https://img.shields.io/twitter/follow/grodola.svg?label=follow&style=flat&logo=twitter&logoColor=4FADFF
|
| 121 |
+
:target: https://twitter.com/grodola
|
| 122 |
+
:alt: Twitter Follow
|
| 123 |
+
|
| 124 |
+
.. |tidelift| image:: https://tidelift.com/badges/github/giampaolo/psutil?style=flat
|
| 125 |
+
:target: https://tidelift.com/subscription/pkg/pypi-psutil?utm_source=pypi-psutil&utm_medium=referral&utm_campaign=readme
|
| 126 |
+
:alt: Tidelift
|
| 127 |
+
|
| 128 |
+
-----
|
| 129 |
+
|
| 130 |
+
Quick links
|
| 131 |
+
===========
|
| 132 |
+
|
| 133 |
+
- `Home page <https://github.com/giampaolo/psutil>`_
|
| 134 |
+
- `Install <https://github.com/giampaolo/psutil/blob/master/INSTALL.rst>`_
|
| 135 |
+
- `Documentation <http://psutil.readthedocs.io>`_
|
| 136 |
+
- `Download <https://pypi.org/project/psutil/#files>`_
|
| 137 |
+
- `Forum <http://groups.google.com/group/psutil/topics>`_
|
| 138 |
+
- `StackOverflow <https://stackoverflow.com/questions/tagged/psutil>`_
|
| 139 |
+
- `Blog <https://gmpy.dev/tags/psutil>`_
|
| 140 |
+
- `What's new <https://github.com/giampaolo/psutil/blob/master/HISTORY.rst>`_
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
Summary
|
| 144 |
+
=======
|
| 145 |
+
|
| 146 |
+
psutil (process and system utilities) is a cross-platform library for
|
| 147 |
+
retrieving information on **running processes** and **system utilization**
|
| 148 |
+
(CPU, memory, disks, network, sensors) in Python.
|
| 149 |
+
It is useful mainly for **system monitoring**, **profiling and limiting process
|
| 150 |
+
resources** and **management of running processes**.
|
| 151 |
+
It implements many functionalities offered by classic UNIX command line tools
|
| 152 |
+
such as *ps, top, iotop, lsof, netstat, ifconfig, free* and others.
|
| 153 |
+
psutil currently supports the following platforms:
|
| 154 |
+
|
| 155 |
+
- **Linux**
|
| 156 |
+
- **Windows**
|
| 157 |
+
- **macOS**
|
| 158 |
+
- **FreeBSD, OpenBSD**, **NetBSD**
|
| 159 |
+
- **Sun Solaris**
|
| 160 |
+
- **AIX**
|
| 161 |
+
|
| 162 |
+
Supported Python versions are **2.7**, **3.6+** and
|
| 163 |
+
`PyPy <http://pypy.org/>`__.
|
| 164 |
+
|
| 165 |
+
Funding
|
| 166 |
+
=======
|
| 167 |
+
|
| 168 |
+
While psutil is free software and will always be, the project would benefit
|
| 169 |
+
immensely from some funding.
|
| 170 |
+
Keeping up with bug reports and maintenance has become hardly sustainable for
|
| 171 |
+
me alone in terms of time.
|
| 172 |
+
If you're a company that's making significant use of psutil you can consider
|
| 173 |
+
becoming a sponsor via `GitHub Sponsors <https://github.com/sponsors/giampaolo>`__,
|
| 174 |
+
`Open Collective <https://opencollective.com/psutil>`__ or
|
| 175 |
+
`PayPal <https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=A9ZS7PKKRM3S8>`__
|
| 176 |
+
and have your logo displayed in here and psutil `doc <https://psutil.readthedocs.io>`__.
|
| 177 |
+
|
| 178 |
+
Sponsors
|
| 179 |
+
========
|
| 180 |
+
|
| 181 |
+
.. image:: https://github.com/giampaolo/psutil/raw/master/docs/_static/tidelift-logo.png
|
| 182 |
+
:width: 200
|
| 183 |
+
:alt: Alternative text
|
| 184 |
+
|
| 185 |
+
`Add your logo <https://github.com/sponsors/giampaolo>`__.
|
| 186 |
+
|
| 187 |
+
Example usages
|
| 188 |
+
==============
|
| 189 |
+
|
| 190 |
+
This represents pretty much the whole psutil API.
|
| 191 |
+
|
| 192 |
+
CPU
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
.. code-block:: python
|
| 196 |
+
|
| 197 |
+
>>> import psutil
|
| 198 |
+
>>>
|
| 199 |
+
>>> psutil.cpu_times()
|
| 200 |
+
scputimes(user=3961.46, nice=169.729, system=2150.659, idle=16900.540, iowait=629.59, irq=0.0, softirq=19.42, steal=0.0, guest=0, guest_nice=0.0)
|
| 201 |
+
>>>
|
| 202 |
+
>>> for x in range(3):
|
| 203 |
+
... psutil.cpu_percent(interval=1)
|
| 204 |
+
...
|
| 205 |
+
4.0
|
| 206 |
+
5.9
|
| 207 |
+
3.8
|
| 208 |
+
>>>
|
| 209 |
+
>>> for x in range(3):
|
| 210 |
+
... psutil.cpu_percent(interval=1, percpu=True)
|
| 211 |
+
...
|
| 212 |
+
[4.0, 6.9, 3.7, 9.2]
|
| 213 |
+
[7.0, 8.5, 2.4, 2.1]
|
| 214 |
+
[1.2, 9.0, 9.9, 7.2]
|
| 215 |
+
>>>
|
| 216 |
+
>>> for x in range(3):
|
| 217 |
+
... psutil.cpu_times_percent(interval=1, percpu=False)
|
| 218 |
+
...
|
| 219 |
+
scputimes(user=1.5, nice=0.0, system=0.5, idle=96.5, iowait=1.5, irq=0.0, softirq=0.0, steal=0.0, guest=0.0, guest_nice=0.0)
|
| 220 |
+
scputimes(user=1.0, nice=0.0, system=0.0, idle=99.0, iowait=0.0, irq=0.0, softirq=0.0, steal=0.0, guest=0.0, guest_nice=0.0)
|
| 221 |
+
scputimes(user=2.0, nice=0.0, system=0.0, idle=98.0, iowait=0.0, irq=0.0, softirq=0.0, steal=0.0, guest=0.0, guest_nice=0.0)
|
| 222 |
+
>>>
|
| 223 |
+
>>> psutil.cpu_count()
|
| 224 |
+
4
|
| 225 |
+
>>> psutil.cpu_count(logical=False)
|
| 226 |
+
2
|
| 227 |
+
>>>
|
| 228 |
+
>>> psutil.cpu_stats()
|
| 229 |
+
scpustats(ctx_switches=20455687, interrupts=6598984, soft_interrupts=2134212, syscalls=0)
|
| 230 |
+
>>>
|
| 231 |
+
>>> psutil.cpu_freq()
|
| 232 |
+
scpufreq(current=931.42925, min=800.0, max=3500.0)
|
| 233 |
+
>>>
|
| 234 |
+
>>> psutil.getloadavg() # also on Windows (emulated)
|
| 235 |
+
(3.14, 3.89, 4.67)
|
| 236 |
+
|
| 237 |
+
Memory
|
| 238 |
+
------
|
| 239 |
+
|
| 240 |
+
.. code-block:: python
|
| 241 |
+
|
| 242 |
+
>>> psutil.virtual_memory()
|
| 243 |
+
svmem(total=10367352832, available=6472179712, percent=37.6, used=8186245120, free=2181107712, active=4748992512, inactive=2758115328, buffers=790724608, cached=3500347392, shared=787554304)
|
| 244 |
+
>>> psutil.swap_memory()
|
| 245 |
+
sswap(total=2097147904, used=296128512, free=1801019392, percent=14.1, sin=304193536, sout=677842944)
|
| 246 |
+
>>>
|
| 247 |
+
|
| 248 |
+
Disks
|
| 249 |
+
-----
|
| 250 |
+
|
| 251 |
+
.. code-block:: python
|
| 252 |
+
|
| 253 |
+
>>> psutil.disk_partitions()
|
| 254 |
+
[sdiskpart(device='/dev/sda1', mountpoint='/', fstype='ext4', opts='rw,nosuid', maxfile=255, maxpath=4096),
|
| 255 |
+
sdiskpart(device='/dev/sda2', mountpoint='/home', fstype='ext', opts='rw', maxfile=255, maxpath=4096)]
|
| 256 |
+
>>>
|
| 257 |
+
>>> psutil.disk_usage('/')
|
| 258 |
+
sdiskusage(total=21378641920, used=4809781248, free=15482871808, percent=22.5)
|
| 259 |
+
>>>
|
| 260 |
+
>>> psutil.disk_io_counters(perdisk=False)
|
| 261 |
+
sdiskio(read_count=719566, write_count=1082197, read_bytes=18626220032, write_bytes=24081764352, read_time=5023392, write_time=63199568, read_merged_count=619166, write_merged_count=812396, busy_time=4523412)
|
| 262 |
+
>>>
|
| 263 |
+
|
| 264 |
+
Network
|
| 265 |
+
-------
|
| 266 |
+
|
| 267 |
+
.. code-block:: python
|
| 268 |
+
|
| 269 |
+
>>> psutil.net_io_counters(pernic=True)
|
| 270 |
+
{'eth0': netio(bytes_sent=485291293, bytes_recv=6004858642, packets_sent=3251564, packets_recv=4787798, errin=0, errout=0, dropin=0, dropout=0),
|
| 271 |
+
'lo': netio(bytes_sent=2838627, bytes_recv=2838627, packets_sent=30567, packets_recv=30567, errin=0, errout=0, dropin=0, dropout=0)}
|
| 272 |
+
>>>
|
| 273 |
+
>>> psutil.net_connections(kind='tcp')
|
| 274 |
+
[sconn(fd=115, family=<AddressFamily.AF_INET: 2>, type=<SocketType.SOCK_STREAM: 1>, laddr=addr(ip='10.0.0.1', port=48776), raddr=addr(ip='93.186.135.91', port=80), status='ESTABLISHED', pid=1254),
|
| 275 |
+
sconn(fd=117, family=<AddressFamily.AF_INET: 2>, type=<SocketType.SOCK_STREAM: 1>, laddr=addr(ip='10.0.0.1', port=43761), raddr=addr(ip='72.14.234.100', port=80), status='CLOSING', pid=2987),
|
| 276 |
+
...]
|
| 277 |
+
>>>
|
| 278 |
+
>>> psutil.net_if_addrs()
|
| 279 |
+
{'lo': [snicaddr(family=<AddressFamily.AF_INET: 2>, address='127.0.0.1', netmask='255.0.0.0', broadcast='127.0.0.1', ptp=None),
|
| 280 |
+
snicaddr(family=<AddressFamily.AF_INET6: 10>, address='::1', netmask='ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', broadcast=None, ptp=None),
|
| 281 |
+
snicaddr(family=<AddressFamily.AF_LINK: 17>, address='00:00:00:00:00:00', netmask=None, broadcast='00:00:00:00:00:00', ptp=None)],
|
| 282 |
+
'wlan0': [snicaddr(family=<AddressFamily.AF_INET: 2>, address='192.168.1.3', netmask='255.255.255.0', broadcast='192.168.1.255', ptp=None),
|
| 283 |
+
snicaddr(family=<AddressFamily.AF_INET6: 10>, address='fe80::c685:8ff:fe45:641%wlan0', netmask='ffff:ffff:ffff:ffff::', broadcast=None, ptp=None),
|
| 284 |
+
snicaddr(family=<AddressFamily.AF_LINK: 17>, address='c4:85:08:45:06:41', netmask=None, broadcast='ff:ff:ff:ff:ff:ff', ptp=None)]}
|
| 285 |
+
>>>
|
| 286 |
+
>>> psutil.net_if_stats()
|
| 287 |
+
{'lo': snicstats(isup=True, duplex=<NicDuplex.NIC_DUPLEX_UNKNOWN: 0>, speed=0, mtu=65536, flags='up,loopback,running'),
|
| 288 |
+
'wlan0': snicstats(isup=True, duplex=<NicDuplex.NIC_DUPLEX_FULL: 2>, speed=100, mtu=1500, flags='up,broadcast,running,multicast')}
|
| 289 |
+
>>>
|
| 290 |
+
|
| 291 |
+
Sensors
|
| 292 |
+
-------
|
| 293 |
+
|
| 294 |
+
.. code-block:: python
|
| 295 |
+
|
| 296 |
+
>>> import psutil
|
| 297 |
+
>>> psutil.sensors_temperatures()
|
| 298 |
+
{'acpitz': [shwtemp(label='', current=47.0, high=103.0, critical=103.0)],
|
| 299 |
+
'asus': [shwtemp(label='', current=47.0, high=None, critical=None)],
|
| 300 |
+
'coretemp': [shwtemp(label='Physical id 0', current=52.0, high=100.0, critical=100.0),
|
| 301 |
+
shwtemp(label='Core 0', current=45.0, high=100.0, critical=100.0)]}
|
| 302 |
+
>>>
|
| 303 |
+
>>> psutil.sensors_fans()
|
| 304 |
+
{'asus': [sfan(label='cpu_fan', current=3200)]}
|
| 305 |
+
>>>
|
| 306 |
+
>>> psutil.sensors_battery()
|
| 307 |
+
sbattery(percent=93, secsleft=16628, power_plugged=False)
|
| 308 |
+
>>>
|
| 309 |
+
|
| 310 |
+
Other system info
|
| 311 |
+
-----------------
|
| 312 |
+
|
| 313 |
+
.. code-block:: python
|
| 314 |
+
|
| 315 |
+
>>> import psutil
|
| 316 |
+
>>> psutil.users()
|
| 317 |
+
[suser(name='giampaolo', terminal='pts/2', host='localhost', started=1340737536.0, pid=1352),
|
| 318 |
+
suser(name='giampaolo', terminal='pts/3', host='localhost', started=1340737792.0, pid=1788)]
|
| 319 |
+
>>>
|
| 320 |
+
>>> psutil.boot_time()
|
| 321 |
+
1365519115.0
|
| 322 |
+
>>>
|
| 323 |
+
|
| 324 |
+
Process management
|
| 325 |
+
------------------
|
| 326 |
+
|
| 327 |
+
.. code-block:: python
|
| 328 |
+
|
| 329 |
+
>>> import psutil
|
| 330 |
+
>>> psutil.pids()
|
| 331 |
+
[1, 2, 3, 4, 5, 6, 7, 46, 48, 50, 51, 178, 182, 222, 223, 224, 268, 1215,
|
| 332 |
+
1216, 1220, 1221, 1243, 1244, 1301, 1601, 2237, 2355, 2637, 2774, 3932,
|
| 333 |
+
4176, 4177, 4185, 4187, 4189, 4225, 4243, 4245, 4263, 4282, 4306, 4311,
|
| 334 |
+
4312, 4313, 4314, 4337, 4339, 4357, 4358, 4363, 4383, 4395, 4408, 4433,
|
| 335 |
+
4443, 4445, 4446, 5167, 5234, 5235, 5252, 5318, 5424, 5644, 6987, 7054,
|
| 336 |
+
7055, 7071]
|
| 337 |
+
>>>
|
| 338 |
+
>>> p = psutil.Process(7055)
|
| 339 |
+
>>> p
|
| 340 |
+
psutil.Process(pid=7055, name='python3', status='running', started='09:04:44')
|
| 341 |
+
>>> p.pid
|
| 342 |
+
7055
|
| 343 |
+
>>> p.name()
|
| 344 |
+
'python3'
|
| 345 |
+
>>> p.exe()
|
| 346 |
+
'/usr/bin/python3'
|
| 347 |
+
>>> p.cwd()
|
| 348 |
+
'/home/giampaolo'
|
| 349 |
+
>>> p.cmdline()
|
| 350 |
+
['/usr/bin/python3', 'main.py']
|
| 351 |
+
>>>
|
| 352 |
+
>>> p.ppid()
|
| 353 |
+
7054
|
| 354 |
+
>>> p.parent()
|
| 355 |
+
psutil.Process(pid=4699, name='bash', status='sleeping', started='09:06:44')
|
| 356 |
+
>>> p.parents()
|
| 357 |
+
[psutil.Process(pid=4699, name='bash', started='09:06:44'),
|
| 358 |
+
psutil.Process(pid=4689, name='gnome-terminal-server', status='sleeping', started='0:06:44'),
|
| 359 |
+
psutil.Process(pid=1, name='systemd', status='sleeping', started='05:56:55')]
|
| 360 |
+
>>> p.children(recursive=True)
|
| 361 |
+
[psutil.Process(pid=29835, name='python3', status='sleeping', started='11:45:38'),
|
| 362 |
+
psutil.Process(pid=29836, name='python3', status='waking', started='11:43:39')]
|
| 363 |
+
>>>
|
| 364 |
+
>>> p.status()
|
| 365 |
+
'running'
|
| 366 |
+
>>> p.create_time()
|
| 367 |
+
1267551141.5019531
|
| 368 |
+
>>> p.terminal()
|
| 369 |
+
'/dev/pts/0'
|
| 370 |
+
>>>
|
| 371 |
+
>>> p.username()
|
| 372 |
+
'giampaolo'
|
| 373 |
+
>>> p.uids()
|
| 374 |
+
puids(real=1000, effective=1000, saved=1000)
|
| 375 |
+
>>> p.gids()
|
| 376 |
+
pgids(real=1000, effective=1000, saved=1000)
|
| 377 |
+
>>>
|
| 378 |
+
>>> p.cpu_times()
|
| 379 |
+
pcputimes(user=1.02, system=0.31, children_user=0.32, children_system=0.1, iowait=0.0)
|
| 380 |
+
>>> p.cpu_percent(interval=1.0)
|
| 381 |
+
12.1
|
| 382 |
+
>>> p.cpu_affinity()
|
| 383 |
+
[0, 1, 2, 3]
|
| 384 |
+
>>> p.cpu_affinity([0, 1]) # set
|
| 385 |
+
>>> p.cpu_num()
|
| 386 |
+
1
|
| 387 |
+
>>>
|
| 388 |
+
>>> p.memory_info()
|
| 389 |
+
pmem(rss=10915840, vms=67608576, shared=3313664, text=2310144, lib=0, data=7262208, dirty=0)
|
| 390 |
+
>>> p.memory_full_info() # "real" USS memory usage (Linux, macOS, Win only)
|
| 391 |
+
pfullmem(rss=10199040, vms=52133888, shared=3887104, text=2867200, lib=0, data=5967872, dirty=0, uss=6545408, pss=6872064, swap=0)
|
| 392 |
+
>>> p.memory_percent()
|
| 393 |
+
0.7823
|
| 394 |
+
>>> p.memory_maps()
|
| 395 |
+
[pmmap_grouped(path='/lib/x8664-linux-gnu/libutil-2.15.so', rss=32768, size=2125824, pss=32768, shared_clean=0, shared_dirty=0, private_clean=20480, private_dirty=12288, referenced=32768, anonymous=12288, swap=0),
|
| 396 |
+
pmmap_grouped(path='/lib/x8664-linux-gnu/libc-2.15.so', rss=3821568, size=3842048, pss=3821568, shared_clean=0, shared_dirty=0, private_clean=0, private_dirty=3821568, referenced=3575808, anonymous=3821568, swap=0),
|
| 397 |
+
pmmap_grouped(path='[heap]', rss=32768, size=139264, pss=32768, shared_clean=0, shared_dirty=0, private_clean=0, private_dirty=32768, referenced=32768, anonymous=32768, swap=0),
|
| 398 |
+
pmmap_grouped(path='[stack]', rss=2465792, size=2494464, pss=2465792, shared_clean=0, shared_dirty=0, private_clean=0, private_dirty=2465792, referenced=2277376, anonymous=2465792, swap=0),
|
| 399 |
+
...]
|
| 400 |
+
>>>
|
| 401 |
+
>>> p.io_counters()
|
| 402 |
+
pio(read_count=478001, write_count=59371, read_bytes=700416, write_bytes=69632, read_chars=456232, write_chars=517543)
|
| 403 |
+
>>>
|
| 404 |
+
>>> p.open_files()
|
| 405 |
+
[popenfile(path='/home/giampaolo/monit.py', fd=3, position=0, mode='r', flags=32768),
|
| 406 |
+
popenfile(path='/var/log/monit.log', fd=4, position=235542, mode='a', flags=33793)]
|
| 407 |
+
>>>
|
| 408 |
+
>>> p.connections(kind='tcp')
|
| 409 |
+
[pconn(fd=115, family=<AddressFamily.AF_INET: 2>, type=<SocketType.SOCK_STREAM: 1>, laddr=addr(ip='10.0.0.1', port=48776), raddr=addr(ip='93.186.135.91', port=80), status='ESTABLISHED'),
|
| 410 |
+
pconn(fd=117, family=<AddressFamily.AF_INET: 2>, type=<SocketType.SOCK_STREAM: 1>, laddr=addr(ip='10.0.0.1', port=43761), raddr=addr(ip='72.14.234.100', port=80), status='CLOSING')]
|
| 411 |
+
>>>
|
| 412 |
+
>>> p.threads()
|
| 413 |
+
[pthread(id=5234, user_time=22.5, system_time=9.2891),
|
| 414 |
+
pthread(id=5237, user_time=0.0707, system_time=1.1)]
|
| 415 |
+
>>>
|
| 416 |
+
>>> p.num_threads()
|
| 417 |
+
4
|
| 418 |
+
>>> p.num_fds()
|
| 419 |
+
8
|
| 420 |
+
>>> p.num_ctx_switches()
|
| 421 |
+
pctxsw(voluntary=78, involuntary=19)
|
| 422 |
+
>>>
|
| 423 |
+
>>> p.nice()
|
| 424 |
+
0
|
| 425 |
+
>>> p.nice(10) # set
|
| 426 |
+
>>>
|
| 427 |
+
>>> p.ionice(psutil.IOPRIO_CLASS_IDLE) # IO priority (Win and Linux only)
|
| 428 |
+
>>> p.ionice()
|
| 429 |
+
pionice(ioclass=<IOPriority.IOPRIO_CLASS_IDLE: 3>, value=0)
|
| 430 |
+
>>>
|
| 431 |
+
>>> p.rlimit(psutil.RLIMIT_NOFILE, (5, 5)) # set resource limits (Linux only)
|
| 432 |
+
>>> p.rlimit(psutil.RLIMIT_NOFILE)
|
| 433 |
+
(5, 5)
|
| 434 |
+
>>>
|
| 435 |
+
>>> p.environ()
|
| 436 |
+
{'LC_PAPER': 'it_IT.UTF-8', 'SHELL': '/bin/bash', 'GREP_OPTIONS': '--color=auto',
|
| 437 |
+
'XDG_CONFIG_DIRS': '/etc/xdg/xdg-ubuntu:/usr/share/upstart/xdg:/etc/xdg',
|
| 438 |
+
...}
|
| 439 |
+
>>>
|
| 440 |
+
>>> p.as_dict()
|
| 441 |
+
{'status': 'running', 'num_ctx_switches': pctxsw(voluntary=63, involuntary=1), 'pid': 5457, ...}
|
| 442 |
+
>>> p.is_running()
|
| 443 |
+
True
|
| 444 |
+
>>> p.suspend()
|
| 445 |
+
>>> p.resume()
|
| 446 |
+
>>>
|
| 447 |
+
>>> p.terminate()
|
| 448 |
+
>>> p.kill()
|
| 449 |
+
>>> p.wait(timeout=3)
|
| 450 |
+
<Exitcode.EX_OK: 0>
|
| 451 |
+
>>>
|
| 452 |
+
>>> psutil.test()
|
| 453 |
+
USER PID %CPU %MEM VSZ RSS TTY START TIME COMMAND
|
| 454 |
+
root 1 0.0 0.0 24584 2240 Jun17 00:00 init
|
| 455 |
+
root 2 0.0 0.0 0 0 Jun17 00:00 kthreadd
|
| 456 |
+
...
|
| 457 |
+
giampaolo 31475 0.0 0.0 20760 3024 /dev/pts/0 Jun19 00:00 python2.4
|
| 458 |
+
giampaolo 31721 0.0 2.2 773060 181896 00:04 10:30 chrome
|
| 459 |
+
root 31763 0.0 0.0 0 0 00:05 00:00 kworker/0:1
|
| 460 |
+
>>>
|
| 461 |
+
|
| 462 |
+
Further process APIs
|
| 463 |
+
--------------------
|
| 464 |
+
|
| 465 |
+
.. code-block:: python
|
| 466 |
+
|
| 467 |
+
>>> import psutil
|
| 468 |
+
>>> for proc in psutil.process_iter(['pid', 'name']):
|
| 469 |
+
... print(proc.info)
|
| 470 |
+
...
|
| 471 |
+
{'pid': 1, 'name': 'systemd'}
|
| 472 |
+
{'pid': 2, 'name': 'kthreadd'}
|
| 473 |
+
{'pid': 3, 'name': 'ksoftirqd/0'}
|
| 474 |
+
...
|
| 475 |
+
>>>
|
| 476 |
+
>>> psutil.pid_exists(3)
|
| 477 |
+
True
|
| 478 |
+
>>>
|
| 479 |
+
>>> def on_terminate(proc):
|
| 480 |
+
... print("process {} terminated".format(proc))
|
| 481 |
+
...
|
| 482 |
+
>>> # waits for multiple processes to terminate
|
| 483 |
+
>>> gone, alive = psutil.wait_procs(procs_list, timeout=3, callback=on_terminate)
|
| 484 |
+
>>>
|
| 485 |
+
|
| 486 |
+
Windows services
|
| 487 |
+
----------------
|
| 488 |
+
|
| 489 |
+
.. code-block:: python
|
| 490 |
+
|
| 491 |
+
>>> list(psutil.win_service_iter())
|
| 492 |
+
[<WindowsService(name='AeLookupSvc', display_name='Application Experience') at 38850096>,
|
| 493 |
+
<WindowsService(name='ALG', display_name='Application Layer Gateway Service') at 38850128>,
|
| 494 |
+
<WindowsService(name='APNMCP', display_name='Ask Update Service') at 38850160>,
|
| 495 |
+
<WindowsService(name='AppIDSvc', display_name='Application Identity') at 38850192>,
|
| 496 |
+
...]
|
| 497 |
+
>>> s = psutil.win_service_get('alg')
|
| 498 |
+
>>> s.as_dict()
|
| 499 |
+
{'binpath': 'C:\\Windows\\System32\\alg.exe',
|
| 500 |
+
'description': 'Provides support for 3rd party protocol plug-ins for Internet Connection Sharing',
|
| 501 |
+
'display_name': 'Application Layer Gateway Service',
|
| 502 |
+
'name': 'alg',
|
| 503 |
+
'pid': None,
|
| 504 |
+
'start_type': 'manual',
|
| 505 |
+
'status': 'stopped',
|
| 506 |
+
'username': 'NT AUTHORITY\\LocalService'}
|
| 507 |
+
|
| 508 |
+
Projects using psutil
|
| 509 |
+
=====================
|
| 510 |
+
|
| 511 |
+
Here's some I find particularly interesting:
|
| 512 |
+
|
| 513 |
+
- https://github.com/google/grr
|
| 514 |
+
- https://github.com/facebook/osquery/
|
| 515 |
+
- https://github.com/nicolargo/glances
|
| 516 |
+
- https://github.com/aristocratos/bpytop
|
| 517 |
+
- https://github.com/Jahaja/psdash
|
| 518 |
+
- https://github.com/ajenti/ajenti
|
| 519 |
+
- https://github.com/home-assistant/home-assistant/
|
| 520 |
+
|
| 521 |
+
Portings
|
| 522 |
+
========
|
| 523 |
+
|
| 524 |
+
- Go: https://github.com/shirou/gopsutil
|
| 525 |
+
- C: https://github.com/hamon-in/cpslib
|
| 526 |
+
- Rust: https://github.com/rust-psutil/rust-psutil
|
| 527 |
+
- Nim: https://github.com/johnscillieri/psutil-nim
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/RECORD
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
psutil-5.9.8.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
psutil-5.9.8.dist-info/LICENSE,sha256=uJwGOzeG4o4MCjjxkx22H-015p3SopZvvs_-4PRsjRA,1548
|
| 3 |
+
psutil-5.9.8.dist-info/METADATA,sha256=GfZ5-fidrq0yXOCgyN4funClVHk6R_zfJz-3FKx0KjU,21837
|
| 4 |
+
psutil-5.9.8.dist-info/RECORD,,
|
| 5 |
+
psutil-5.9.8.dist-info/WHEEL,sha256=rgpVBmjjvbINeGKCkWEGd3f40VHMTsDkQj1Lgil82zE,221
|
| 6 |
+
psutil-5.9.8.dist-info/top_level.txt,sha256=gCNhn57wzksDjSAISmgMJ0aiXzQulk0GJhb2-BAyYgw,7
|
| 7 |
+
psutil/__init__.py,sha256=YWf_i2ZjuJqELRfNl6nX0nZuoi09GXqMZPJVsMJMCQQ,89169
|
| 8 |
+
psutil/__pycache__/__init__.cpython-38.pyc,,
|
| 9 |
+
psutil/__pycache__/_common.cpython-38.pyc,,
|
| 10 |
+
psutil/__pycache__/_compat.cpython-38.pyc,,
|
| 11 |
+
psutil/__pycache__/_psaix.cpython-38.pyc,,
|
| 12 |
+
psutil/__pycache__/_psbsd.cpython-38.pyc,,
|
| 13 |
+
psutil/__pycache__/_pslinux.cpython-38.pyc,,
|
| 14 |
+
psutil/__pycache__/_psosx.cpython-38.pyc,,
|
| 15 |
+
psutil/__pycache__/_psposix.cpython-38.pyc,,
|
| 16 |
+
psutil/__pycache__/_pssunos.cpython-38.pyc,,
|
| 17 |
+
psutil/__pycache__/_pswindows.cpython-38.pyc,,
|
| 18 |
+
psutil/_common.py,sha256=BTwHxdYJQynrn5i8IOs6XFxLo9L1Eg5cgDCq6Yaypr0,29393
|
| 19 |
+
psutil/_compat.py,sha256=AOF0vSCWle_sbJ1Gw-CGx0aEI9yk5u70YhPYOPg3KHs,15349
|
| 20 |
+
psutil/_psaix.py,sha256=1bqEwjk6IG3Y-zrDajKi8oPSYvq3NrqpPUQeDRo4Ugg,18749
|
| 21 |
+
psutil/_psbsd.py,sha256=FruAJy_GrpjAfHLpb4c3IVGfy2Xii8b1BHnIjxUfbbI,31956
|
| 22 |
+
psutil/_pslinux.py,sha256=QYI6yHTvRMYZlVxB068xVAEMEAGGRSU9E7-sJD3165o,88043
|
| 23 |
+
psutil/_psosx.py,sha256=d_KMSzmjL6vAYQx1fQN57b3xazJGdTt4rIysODB1r2g,16209
|
| 24 |
+
psutil/_psposix.py,sha256=X9rd7WHKQ6mUAn2ihb03MCnzrBtQsrPRkCouExmuagQ,8235
|
| 25 |
+
psutil/_pssunos.py,sha256=Zx6eLY-0NRUFFIKP7SycktgDoottdnlA9aX8y4e74dY,25559
|
| 26 |
+
psutil/_psutil_linux.abi3.so,sha256=onwm8BWn6axbKjPqB263JHm6rbDcEvMncKYhhpbu5I4,115304
|
| 27 |
+
psutil/_psutil_posix.abi3.so,sha256=xNVKy1LdKcnigEe_BpaXfYEg4qad9MkedLwkYh4BaBk,71624
|
| 28 |
+
psutil/_pswindows.py,sha256=BXgoASpIS6ccw5jTx4V-H2fMsIcSv_NQ6EozsxFgY-0,37734
|
| 29 |
+
psutil/tests/__init__.py,sha256=gc621Vvgj2NaxusB_zGwfqwP_np3qaCQqJlyPrp1D0k,64753
|
| 30 |
+
psutil/tests/__main__.py,sha256=f1YY6SZebctd5Hwb1in40nFShfJw4zA2FLidsdg_eY8,269
|
| 31 |
+
psutil/tests/__pycache__/__init__.cpython-38.pyc,,
|
| 32 |
+
psutil/tests/__pycache__/__main__.cpython-38.pyc,,
|
| 33 |
+
psutil/tests/__pycache__/runner.cpython-38.pyc,,
|
| 34 |
+
psutil/tests/__pycache__/test_aix.cpython-38.pyc,,
|
| 35 |
+
psutil/tests/__pycache__/test_bsd.cpython-38.pyc,,
|
| 36 |
+
psutil/tests/__pycache__/test_connections.cpython-38.pyc,,
|
| 37 |
+
psutil/tests/__pycache__/test_contracts.cpython-38.pyc,,
|
| 38 |
+
psutil/tests/__pycache__/test_linux.cpython-38.pyc,,
|
| 39 |
+
psutil/tests/__pycache__/test_memleaks.cpython-38.pyc,,
|
| 40 |
+
psutil/tests/__pycache__/test_misc.cpython-38.pyc,,
|
| 41 |
+
psutil/tests/__pycache__/test_osx.cpython-38.pyc,,
|
| 42 |
+
psutil/tests/__pycache__/test_posix.cpython-38.pyc,,
|
| 43 |
+
psutil/tests/__pycache__/test_process.cpython-38.pyc,,
|
| 44 |
+
psutil/tests/__pycache__/test_process_all.cpython-38.pyc,,
|
| 45 |
+
psutil/tests/__pycache__/test_sunos.cpython-38.pyc,,
|
| 46 |
+
psutil/tests/__pycache__/test_system.cpython-38.pyc,,
|
| 47 |
+
psutil/tests/__pycache__/test_testutils.cpython-38.pyc,,
|
| 48 |
+
psutil/tests/__pycache__/test_unicode.cpython-38.pyc,,
|
| 49 |
+
psutil/tests/__pycache__/test_windows.cpython-38.pyc,,
|
| 50 |
+
psutil/tests/runner.py,sha256=WtRnLZ5gS39gIysLCkeV99hw5LvodVwBGesoen9IMNs,11464
|
| 51 |
+
psutil/tests/test_aix.py,sha256=8SKjFw7cR3byBShlvWAzQSOTjji5Bpnk8JyUksR0AQI,4585
|
| 52 |
+
psutil/tests/test_bsd.py,sha256=kfNXLsZ1p-VoGtVX4At9qMOS_zN8OMVP9yecuHEWaC4,21245
|
| 53 |
+
psutil/tests/test_connections.py,sha256=CHL65q2IYxb8ErtJAUh87RyKSCWewwmAg4VFK80arO8,21642
|
| 54 |
+
psutil/tests/test_contracts.py,sha256=_TAWN7ldbgqJIdh6tMkRDivXPoH6T3jKYG47_KzDvtE,12998
|
| 55 |
+
psutil/tests/test_linux.py,sha256=LPvhEPUPSQ4MdxBlARPcR4Thsd_wNqKkqj0JqlKkZCo,92530
|
| 56 |
+
psutil/tests/test_memleaks.py,sha256=pzwEMUaz6Xh8AmN_qiCmYTU2yzot_dwsdbTfFdK89Vk,15012
|
| 57 |
+
psutil/tests/test_misc.py,sha256=JovYMJNShu2yC3t2sfkFY5uW96TL7K5gkwXnjOR25aQ,35117
|
| 58 |
+
psutil/tests/test_osx.py,sha256=MQsepO25TlfydaOxjqgEBM4Wri9SnCUkpBLEiQnKv2Q,6603
|
| 59 |
+
psutil/tests/test_posix.py,sha256=sDam9vdJJWuYNddovhxi1c3K-z4AfTAD0fQyyaeD8YY,17387
|
| 60 |
+
psutil/tests/test_process.py,sha256=lOq3nOyQQvKRJRaTREO0GB0ErOQGZeqxHsEbn5qs8P4,61535
|
| 61 |
+
psutil/tests/test_process_all.py,sha256=qkLXnkVVYEqM2IhhID0CzEVWAmD4Ib7i0E4mUjo8J_Y,16112
|
| 62 |
+
psutil/tests/test_sunos.py,sha256=NhZsHABJKjCEDzGA7ZL62s6NlqpJwpoDybaPB2Bm9HE,1310
|
| 63 |
+
psutil/tests/test_system.py,sha256=rfzGP_ZuMHxzuTz3CuVxn8-zXNe5_9YCm1Z5JTKXqT8,37044
|
| 64 |
+
psutil/tests/test_testutils.py,sha256=mBbsRBbUf4VmDrKTbvlSeVqo_5HTM9h4eTE_VZ_5eXc,14828
|
| 65 |
+
psutil/tests/test_unicode.py,sha256=lKytzxNA72Zdhhz6jnXjVRm-3-79j1zeliPO1SMSVaE,12549
|
| 66 |
+
psutil/tests/test_windows.py,sha256=D_fKdhp8rsXDF7OrBXLRPS7XMpqXAzqJzbrjPqlL5pE,35298
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
psutil
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/LICENSE
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ISC License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2014 Kenneth Reitz.
|
| 4 |
+
|
| 5 |
+
Permission to use, copy, modify, and/or distribute this software for any
|
| 6 |
+
purpose with or without fee is hereby granted, provided that the above
|
| 7 |
+
copyright notice and this permission notice appear in all copies.
|
| 8 |
+
|
| 9 |
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
| 10 |
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
| 11 |
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
| 12 |
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
| 13 |
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
| 14 |
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
| 15 |
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/METADATA
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.1
|
| 2 |
+
Name: requests-oauthlib
|
| 3 |
+
Version: 1.3.1
|
| 4 |
+
Summary: OAuthlib authentication support for Requests.
|
| 5 |
+
Home-page: https://github.com/requests/requests-oauthlib
|
| 6 |
+
Author: Kenneth Reitz
|
| 7 |
+
Author-email: me@kennethreitz.com
|
| 8 |
+
License: ISC
|
| 9 |
+
Platform: UNKNOWN
|
| 10 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 11 |
+
Classifier: Intended Audience :: Developers
|
| 12 |
+
Classifier: Natural Language :: English
|
| 13 |
+
Classifier: License :: OSI Approved :: BSD License
|
| 14 |
+
Classifier: Programming Language :: Python
|
| 15 |
+
Classifier: Programming Language :: Python :: 2
|
| 16 |
+
Classifier: Programming Language :: Python :: 2.7
|
| 17 |
+
Classifier: Programming Language :: Python :: 3
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.4
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.5
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.6
|
| 21 |
+
Classifier: Programming Language :: Python :: 3.7
|
| 22 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 23 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 24 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
| 25 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
| 26 |
+
Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
|
| 27 |
+
Description-Content-Type: text/x-rst
|
| 28 |
+
License-File: LICENSE
|
| 29 |
+
Requires-Dist: oauthlib (>=3.0.0)
|
| 30 |
+
Requires-Dist: requests (>=2.0.0)
|
| 31 |
+
Provides-Extra: rsa
|
| 32 |
+
Requires-Dist: oauthlib[signedtoken] (>=3.0.0) ; extra == 'rsa'
|
| 33 |
+
|
| 34 |
+
Requests-OAuthlib |build-status| |coverage-status| |docs|
|
| 35 |
+
=========================================================
|
| 36 |
+
|
| 37 |
+
This project provides first-class OAuth library support for `Requests <http://python-requests.org>`_.
|
| 38 |
+
|
| 39 |
+
The OAuth 1 workflow
|
| 40 |
+
--------------------
|
| 41 |
+
|
| 42 |
+
OAuth 1 can seem overly complicated and it sure has its quirks. Luckily,
|
| 43 |
+
requests_oauthlib hides most of these and let you focus at the task at hand.
|
| 44 |
+
|
| 45 |
+
Accessing protected resources using requests_oauthlib is as simple as:
|
| 46 |
+
|
| 47 |
+
.. code-block:: pycon
|
| 48 |
+
|
| 49 |
+
>>> from requests_oauthlib import OAuth1Session
|
| 50 |
+
>>> twitter = OAuth1Session('client_key',
|
| 51 |
+
client_secret='client_secret',
|
| 52 |
+
resource_owner_key='resource_owner_key',
|
| 53 |
+
resource_owner_secret='resource_owner_secret')
|
| 54 |
+
>>> url = 'https://api.twitter.com/1/account/settings.json'
|
| 55 |
+
>>> r = twitter.get(url)
|
| 56 |
+
|
| 57 |
+
Before accessing resources you will need to obtain a few credentials from your
|
| 58 |
+
provider (e.g. Twitter) and authorization from the user for whom you wish to
|
| 59 |
+
retrieve resources for. You can read all about this in the full
|
| 60 |
+
`OAuth 1 workflow guide on RTD <https://requests-oauthlib.readthedocs.io/en/latest/oauth1_workflow.html>`_.
|
| 61 |
+
|
| 62 |
+
The OAuth 2 workflow
|
| 63 |
+
--------------------
|
| 64 |
+
|
| 65 |
+
OAuth 2 is generally simpler than OAuth 1 but comes in more flavours. The most
|
| 66 |
+
common being the Authorization Code Grant, also known as the WebApplication
|
| 67 |
+
flow.
|
| 68 |
+
|
| 69 |
+
Fetching a protected resource after obtaining an access token can be extremely
|
| 70 |
+
simple. However, before accessing resources you will need to obtain a few
|
| 71 |
+
credentials from your provider (e.g. Google) and authorization from the user
|
| 72 |
+
for whom you wish to retrieve resources for. You can read all about this in the
|
| 73 |
+
full `OAuth 2 workflow guide on RTD <https://requests-oauthlib.readthedocs.io/en/latest/oauth2_workflow.html>`_.
|
| 74 |
+
|
| 75 |
+
Installation
|
| 76 |
+
-------------
|
| 77 |
+
|
| 78 |
+
To install requests and requests_oauthlib you can use pip:
|
| 79 |
+
|
| 80 |
+
.. code-block:: bash
|
| 81 |
+
|
| 82 |
+
$ pip install requests requests_oauthlib
|
| 83 |
+
|
| 84 |
+
.. |build-status| image:: https://github.com/requests/requests-oauthlib/actions/workflows/run-tests.yml/badge.svg
|
| 85 |
+
:target: https://github.com/requests/requests-oauthlib/actions
|
| 86 |
+
.. |coverage-status| image:: https://img.shields.io/coveralls/requests/requests-oauthlib.svg
|
| 87 |
+
:target: https://coveralls.io/r/requests/requests-oauthlib
|
| 88 |
+
.. |docs| image:: https://readthedocs.org/projects/requests-oauthlib/badge/
|
| 89 |
+
:alt: Documentation Status
|
| 90 |
+
:scale: 100%
|
| 91 |
+
:target: https://requests-oauthlib.readthedocs.io/
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
History
|
| 95 |
+
-------
|
| 96 |
+
|
| 97 |
+
v1.3.1 (21 January 2022)
|
| 98 |
+
++++++++++++++++++++++++
|
| 99 |
+
|
| 100 |
+
- Add initial support for OAuth Mutual TLS (draft-ietf-oauth-mtls)
|
| 101 |
+
- Add eBay compliance fix
|
| 102 |
+
- Add Spotify OAuth 2 Tutorial
|
| 103 |
+
- Add support for python 3.8, 3.9
|
| 104 |
+
- Fixed LinkedIn Compliance Fixes
|
| 105 |
+
- Fixed ReadTheDocs Documentation and sphinx errors
|
| 106 |
+
- Moved pipeline to GitHub Actions
|
| 107 |
+
|
| 108 |
+
v1.3.0 (6 November 2019)
|
| 109 |
+
++++++++++++++++++++++++
|
| 110 |
+
|
| 111 |
+
- Instagram compliance fix
|
| 112 |
+
- Added ``force_querystring`` argument to fetch_token() method on OAuth2Session
|
| 113 |
+
|
| 114 |
+
v1.2.0 (14 January 2019)
|
| 115 |
+
++++++++++++++++++++++++
|
| 116 |
+
|
| 117 |
+
- This project now depends on OAuthlib 3.0.0 and above. It does **not** support
|
| 118 |
+
versions of OAuthlib before 3.0.0.
|
| 119 |
+
- Updated oauth2 tests to use 'sess' for an OAuth2Session instance instead of `auth`
|
| 120 |
+
because OAuth2Session objects and methods acceept an `auth` paramether which is
|
| 121 |
+
typically an instance of `requests.auth.HTTPBasicAuth`
|
| 122 |
+
- `OAuth2Session.fetch_token` previously tried to guess how and where to provide
|
| 123 |
+
"client" and "user" credentials incorrectly. This was incompatible with some
|
| 124 |
+
OAuth servers and incompatible with breaking changes in oauthlib that seek to
|
| 125 |
+
correctly provide the `client_id`. The older implementation also did not raise
|
| 126 |
+
the correct exceptions when username and password are not present on Legacy
|
| 127 |
+
clients.
|
| 128 |
+
- Avoid automatic netrc authentication for OAuth2Session.
|
| 129 |
+
|
| 130 |
+
v1.1.0 (9 January 2019)
|
| 131 |
+
+++++++++++++++++++++++
|
| 132 |
+
|
| 133 |
+
- Adjusted version specifier for ``oauthlib`` dependency: this project is
|
| 134 |
+
not yet compatible with ``oauthlib`` 3.0.0.
|
| 135 |
+
- Dropped dependency on ``nose``.
|
| 136 |
+
- Minor changes to clean up the code and make it more readable/maintainable.
|
| 137 |
+
|
| 138 |
+
v1.0.0 (4 June 2018)
|
| 139 |
+
++++++++++++++++++++
|
| 140 |
+
|
| 141 |
+
- **Removed support for Python 2.6 and Python 3.3.**
|
| 142 |
+
This project now supports Python 2.7, and Python 3.4 and above.
|
| 143 |
+
- Added several examples to the documentation.
|
| 144 |
+
- Added plentymarkets compliance fix.
|
| 145 |
+
- Added a ``token`` property to OAuth1Session, to match the corresponding
|
| 146 |
+
``token`` property on OAuth2Session.
|
| 147 |
+
|
| 148 |
+
v0.8.0 (14 February 2017)
|
| 149 |
+
+++++++++++++++++++++++++
|
| 150 |
+
|
| 151 |
+
- Added Fitbit compliance fix.
|
| 152 |
+
- Fixed an issue where newlines in the response body for the access token
|
| 153 |
+
request would cause errors when trying to extract the token.
|
| 154 |
+
- Fixed an issue introduced in v0.7.0 where users passing ``auth`` to several
|
| 155 |
+
methods would encounter conflicts with the ``client_id`` and
|
| 156 |
+
``client_secret``-derived auth. The user-supplied ``auth`` argument is now
|
| 157 |
+
used in preference to those options.
|
| 158 |
+
|
| 159 |
+
v0.7.0 (22 September 2016)
|
| 160 |
+
++++++++++++++++++++++++++
|
| 161 |
+
|
| 162 |
+
- Allowed ``OAuth2Session.request`` to take the ``client_id`` and
|
| 163 |
+
``client_secret`` parameters for the purposes of automatic token refresh,
|
| 164 |
+
which may need them.
|
| 165 |
+
|
| 166 |
+
v0.6.2 (12 July 2016)
|
| 167 |
+
+++++++++++++++++++++
|
| 168 |
+
|
| 169 |
+
- Use ``client_id`` and ``client_secret`` for the Authorization header if
|
| 170 |
+
provided.
|
| 171 |
+
- Allow explicit bypass of the Authorization header by setting ``auth=False``.
|
| 172 |
+
- Pass through the ``proxies`` kwarg when refreshing tokens.
|
| 173 |
+
- Miscellaneous cleanups.
|
| 174 |
+
|
| 175 |
+
v0.6.1 (19 February 2016)
|
| 176 |
+
+++++++++++++++++++++++++
|
| 177 |
+
|
| 178 |
+
- Fixed a bug when sending authorization in headers with no username and
|
| 179 |
+
password present.
|
| 180 |
+
- Make sure we clear the session token before obtaining a new one.
|
| 181 |
+
- Some improvements to the Slack compliance fix.
|
| 182 |
+
- Avoid timing problems around token refresh.
|
| 183 |
+
- Allow passing arbitrary arguments to requests when calling
|
| 184 |
+
``fetch_request_token`` and ``fetch_access_token``.
|
| 185 |
+
|
| 186 |
+
v0.6.0 (14 December 2015)
|
| 187 |
+
+++++++++++++++++++++++++
|
| 188 |
+
|
| 189 |
+
- Add compliance fix for Slack.
|
| 190 |
+
- Add compliance fix for Mailchimp.
|
| 191 |
+
- ``TokenRequestDenied`` exceptions now carry the entire response, not just the
|
| 192 |
+
status code.
|
| 193 |
+
- Pass through keyword arguments when refreshing tokens automatically.
|
| 194 |
+
- Send authorization in headers, not just body, to maximize compatibility.
|
| 195 |
+
- More getters/setters available for OAuth2 session client values.
|
| 196 |
+
- Allow sending custom headers when refreshing tokens, and set some defaults.
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
v0.5.0 (4 May 2015)
|
| 200 |
+
+++++++++++++++++++
|
| 201 |
+
- Fix ``TypeError`` being raised instead of ``TokenMissing`` error.
|
| 202 |
+
- Raise requests exceptions on 4XX and 5XX responses in the OAuth2 flow.
|
| 203 |
+
- Avoid ``AttributeError`` when initializing the ``OAuth2Session`` class
|
| 204 |
+
without complete client information.
|
| 205 |
+
|
| 206 |
+
v0.4.2 (16 October 2014)
|
| 207 |
+
++++++++++++++++++++++++
|
| 208 |
+
- New ``authorized`` property on OAuth1Session and OAuth2Session, which allows
|
| 209 |
+
you to easily determine if the session is already authorized with OAuth tokens
|
| 210 |
+
or not.
|
| 211 |
+
- New ``TokenMissing`` and ``VerifierMissing`` exception classes for OAuth1Session:
|
| 212 |
+
this will make it easier to catch and identify these exceptions.
|
| 213 |
+
|
| 214 |
+
v0.4.1 (6 June 2014)
|
| 215 |
+
++++++++++++++++++++
|
| 216 |
+
- New install target ``[rsa]`` for people using OAuth1 RSA-SHA1 signature
|
| 217 |
+
method.
|
| 218 |
+
- Fixed bug in OAuth2 where supplied state param was not used in auth url.
|
| 219 |
+
- OAuth2 HTTPS checking can be disabled by setting environment variable
|
| 220 |
+
``OAUTHLIB_INSECURE_TRANSPORT``.
|
| 221 |
+
- OAuth1 now re-authorize upon redirects.
|
| 222 |
+
- OAuth1 token fetching now raise a detailed error message when the
|
| 223 |
+
response body is incorrectly encoded or the request was denied.
|
| 224 |
+
- Added support for custom OAuth1 clients.
|
| 225 |
+
- OAuth2 compliance fix for Sina Weibo.
|
| 226 |
+
- Multiple fixes to facebook compliance fix.
|
| 227 |
+
- Compliance fixes now re-encode body properly as bytes in Python 3.
|
| 228 |
+
- Logging now properly done under ``requests_oauthlib`` namespace instead
|
| 229 |
+
of piggybacking on oauthlib namespace.
|
| 230 |
+
- Logging introduced for OAuth1 auth and session.
|
| 231 |
+
|
| 232 |
+
v0.4.0 (29 September 2013)
|
| 233 |
+
++++++++++++++++++++++++++
|
| 234 |
+
- OAuth1Session methods only return unicode strings. #55.
|
| 235 |
+
- Renamed requests_oauthlib.core to requests_oauthlib.oauth1_auth for consistency. #79.
|
| 236 |
+
- Added Facebook compliance fix and access_token_response hook to OAuth2Session. #63.
|
| 237 |
+
- Added LinkedIn compliance fix.
|
| 238 |
+
- Added refresh_token_response compliance hook, invoked before parsing the refresh token.
|
| 239 |
+
- Correctly limit compliance hooks to running only once!
|
| 240 |
+
- Content type guessing should only be done when no content type is given
|
| 241 |
+
- OAuth1 now updates r.headers instead of replacing it with non case insensitive dict
|
| 242 |
+
- Remove last use of Response.content (in OAuth1Session). #44.
|
| 243 |
+
- State param can now be supplied in OAuth2Session.authorize_url
|
| 244 |
+
|
| 245 |
+
|