diff --git a/.gitattributes b/.gitattributes index e21af445aa68547683013202264074eb32608340..6137d1b7014e589f0da5cf3f2312c42390e707c5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -240,3 +240,6 @@ my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_struct.cpyth my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_json.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text my_container_sandbox/workspace/anaconda3/lib/libnppig.so.11.3.3.95 filter=lfs diff=lfs merge=lfs -text my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_pickle.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..42b66149973f0bf3b787a970043d8ca30e3a83ee --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_codecs_kr.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd191d7e8e9abe1ef92f04ea5546ee64ced1fcdf6aa7a4beaa6f7ae3126358d +size 177608 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..cde337d1c0f73501c9e92d3868f6129e403f0b2d --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_curses.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f172437ab7f53fb597873b15ed5c41e4a63a346968a7b3b3b1ab71832c1dd2c +size 464208 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..cde067fa7dacdda794ef997fc1c401d4c4488a31 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/lib-dynload/_elementtree.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218bd174fff49763ef22945a55a34b57370283ab40ab6fca237345f183ea256a +size 347560 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libXau-00ec42fe.so.6.0.0 b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libXau-00ec42fe.so.6.0.0 new file mode 100644 index 0000000000000000000000000000000000000000..936dbcdd7f2ccd022346baebcfdd10c55542b43c Binary files /dev/null and b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libXau-00ec42fe.so.6.0.0 differ diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b2890623dc2725b6e69551f0c6055de2d39495c --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/__init__.py @@ -0,0 +1,21 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. + +__version__ = "0.11.0" + +from .accelerator import Accelerator +from .big_modeling import cpu_offload, disk_offload, dispatch_model, init_empty_weights, load_checkpoint_and_dispatch +from .launchers import debug_launcher, notebook_launcher +from .utils import ( + DeepSpeedPlugin, + DistributedDataParallelKwargs, + DistributedType, + FullyShardedDataParallelPlugin, + GradScalerKwargs, + InitProcessGroupKwargs, + find_executable_batch_size, + infer_auto_device_map, + load_checkpoint_in_model, + synchronize_rng_states, +) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/accelerator.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/accelerator.py new file mode 100644 index 0000000000000000000000000000000000000000..322b81347a7d0e5941fe09733d9a14f55153a89c --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/accelerator.py @@ -0,0 +1,1154 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import gc +import math +import os +import sys +import warnings +from contextlib import contextmanager +from typing import List, Optional, Union + +import torch + +from .checkpointing import load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state +from .data_loader import prepare_data_loader +from .logging import get_logger +from .optimizer import AcceleratedOptimizer +from .scheduler import AcceleratedScheduler +from .state import AcceleratorState, GradientState +from .tracking import LOGGER_TYPE_TO_CLASS, GeneralTracker, filter_trackers +from .utils import ( + DeepSpeedPlugin, + DistributedDataParallelKwargs, + DistributedType, + FullyShardedDataParallelPlugin, + GradScalerKwargs, + InitProcessGroupKwargs, + KwargsHandler, + LoggerType, + PrecisionType, + RNGType, + compare_versions, + convert_outputs_to_fp32, + extract_model_from_parallel, + gather, + get_pretty_name, + is_bf16_available, + is_deepspeed_available, + is_torch_version, + is_tpu_available, + pad_across_processes, + reduce, + save, + wait_for_everyone, +) + + +if is_deepspeed_available(): + import deepspeed + + from .utils import ( + DeepSpeedEngineWrapper, + DeepSpeedOptimizerWrapper, + DeepSpeedSchedulerWrapper, + DummyOptim, + DummyScheduler, + ) + +if is_tpu_available(check_device=False): + import torch_xla.distributed.xla_multiprocessing as xmp + +logger = get_logger(__name__) + + +class Accelerator: + """ + Creates an instance of an accelerator for distributed training (on multi-GPU, TPU) or mixed precision training. + + Args: + device_placement (`bool`, *optional*, defaults to `True`): + Whether or not the accelerator should put objects on device (tensors yielded by the dataloader, model, + etc...). + split_batches (`bool`, *optional*, defaults to `False`): + Whether or not the accelerator should split the batches yielded by the dataloaders across the devices. If + `True` the actual batch size used will be the same on any kind of distributed processes, but it must be a + round multiple of the `num_processes` you are using. If `False`, actual batch size used will be the one set + in your script multiplied by the number of processes. + mixed_precision (`str`, *optional*): + Whether or not to use mixed precision training (fp16 or bfloat16). Choose from 'no','fp16','bf16'. Will + default to the value in the environment variable `MIXED_PRECISION`, which will use the default value in the + accelerate config of the current system or the flag passed with the `accelerate.launch` command. 'fp16' + requires pytorch 1.6 or higher. 'bf16' requires pytorch 1.10 or higher. + gradient_accumulation_steps (`int`, *optional*, default to 1): + The number of steps that should pass before gradients are accumulated. A number > 1 should be combined with + `Accelerator.accumulate`. + cpu (`bool`, *optional*): + Whether or not to force the script to execute on CPU. Will ignore GPU available if set to `True` and force + the execution on one process only. + deepspeed_plugin (`DeepSpeedPlugin`, *optional*): + Tweak your DeepSpeed related args using this argument. This argument is optional and can be configured + directly using *accelerate config* + fsdp_plugin (`FullyShardedDataParallelPlugin`, *optional*): + Tweak your FSDP related args using this argument. This argument is optional and can be configured directly + using *accelerate config* + rng_types (list of `str` or [`~utils.RNGType`]): + The list of random number generators to synchronize at the beginning of each iteration in your prepared + dataloaders. Should be one or several of: + + - `"torch"`: the base torch random number generator + - `"cuda"`: the CUDA random number generator (GPU only) + - `"xla"`: the XLA random number generator (TPU only) + - `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your + dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type. + + Will default to `["torch"]` for PyTorch versions <=1.5.1 and `["generator"]` for PyTorch versions >= 1.6. + log_with (list of `str`, [`~utils.LoggerType`] or [`~tracking.GeneralTracker`], *optional*): + A list of loggers to be setup for experiment tracking. Should be one or several of: + + - `"all"` + - `"tensorboard"` + - `"wandb"` + - `"comet_ml"` + If `"all`" is selected, will pick up all available trackers in the environment and intialize them. Can also + accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`. + logging_dir (`str`, `os.PathLike`, *optional*): + A path to a directory for storing logs of locally-compatible loggers. + dispatch_batches (`bool`, *optional*): + If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process + and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose + underlying dataset is an `IterableDataset`, `False` otherwise. + step_scheduler_with_optimizer (`bool`, *optional`, defaults to `True`): + Set `True` if the learning rate scheduler is stepped at the same time as the optimizer, `False` if only + done under certain circumstances (at the end of each epoch, for instance). + kwargs_handlers (`List[KwargHandler]`, *optional*) + A list of `KwargHandler` to customize how the objects related to distributed training or mixed precision + are created. See [kwargs](kwargs) for more information. + + Attributes + + - **device** (`torch.device`) -- The device to use. + - **state** ([`~state.AcceleratorState`]) -- The distributed setup state. + """ + + def __init__( + self, + device_placement: bool = True, + split_batches: bool = False, + fp16: bool = None, + mixed_precision: Union[PrecisionType, str] = None, + gradient_accumulation_steps: int = 1, + cpu: bool = False, + deepspeed_plugin: DeepSpeedPlugin = None, + fsdp_plugin: FullyShardedDataParallelPlugin = None, + rng_types: Optional[List[Union[str, RNGType]]] = None, + log_with: Optional[List[Union[str, LoggerType, GeneralTracker]]] = None, + logging_dir: Optional[Union[str, os.PathLike]] = None, + dispatch_batches: Optional[bool] = None, + step_scheduler_with_optimizer: bool = True, + kwargs_handlers: Optional[List[KwargsHandler]] = None, + ): + self.logging_dir = logging_dir + trackers = filter_trackers(log_with, self.logging_dir) + if len(trackers) < 1 and log_with is not None: + warnings.warn(f"`log_with={log_with}` was passed but no supported trackers are currently installed.") + self.log_with = trackers + + if mixed_precision is not None: + mixed_precision = str(mixed_precision) + if mixed_precision not in PrecisionType: + raise ValueError( + f"Unknown mixed_precision mode: {mixed_precision}. Choose between {PrecisionType.list()}" + ) + + if fp16: + warnings.warn('fp16=True is deprecated. Use mixed_precision="fp16" instead.', DeprecationWarning) + mixed_precision = "fp16" + + if deepspeed_plugin is None: # init from env variables + deepspeed_plugin = DeepSpeedPlugin() if os.environ.get("USE_DEEPSPEED", "false") == "true" else None + else: + assert isinstance( + deepspeed_plugin, DeepSpeedPlugin + ), "`deepspeed_plugin` must be a DeepSpeedPlugin object." + os.environ["USE_DEEPSPEED"] = "true" # use DeepSpeed if plugin is provided + if deepspeed_plugin: + if not is_deepspeed_available(): + raise ImportError("DeepSpeed is not installed => run `pip install deepspeed` or build it from source.") + if compare_versions("deepspeed", "<", "0.6.5"): + raise ImportError("DeepSpeed version must be >= 0.6.5. Please update DeepSpeed.") + + mixed_precision = os.environ.get("MIXED_PRECISION", "no") if mixed_precision is None else mixed_precision + deepspeed_plugin.set_mixed_precision(mixed_precision) + deepspeed_plugin.set_deepspeed_weakref() + + if os.environ.get("USE_FSDP", "false") == "true" or isinstance(fsdp_plugin, FullyShardedDataParallelPlugin): + if is_torch_version("<", "1.12.0"): + raise ValueError("FSDP requires PyTorch >= 1.12.0") + + if fsdp_plugin is None: # init from env variables + fsdp_plugin = FullyShardedDataParallelPlugin() if os.environ.get("USE_FSDP", "false") == "true" else None + else: + if not isinstance(fsdp_plugin, FullyShardedDataParallelPlugin): + raise TypeError("`fsdp_plugin` must be a FullyShardedDataParallelPlugin object.") + os.environ["USE_FSDP"] = "true" # use FSDP if plugin is provided + + # Kwargs handlers + self.ddp_handler = None + self.scaler_handler = None + self.init_handler = None + if kwargs_handlers is not None: + for handler in kwargs_handlers: + assert isinstance(handler, KwargsHandler), f"Unsupported kwargs handler passed: {handler}." + if isinstance(handler, DistributedDataParallelKwargs): + if self.ddp_handler is not None: + raise ValueError("You can only pass one `DistributedDataParallelKwargs` in `kwargs_handler`.") + else: + self.ddp_handler = handler + elif isinstance(handler, GradScalerKwargs): + if self.scaler_handler is not None: + raise ValueError("You can only pass one `GradScalerKwargs` in `kwargs_handler`.") + else: + self.scaler_handler = handler + elif isinstance(handler, InitProcessGroupKwargs): + if self.init_handler is not None: + raise ValueError("You can only pass one `InitProcessGroupKwargs` in `kwargs_handler`.") + else: + self.init_handler = handler + + kwargs = self.init_handler.to_kwargs() if self.init_handler is not None else {} + self.state = AcceleratorState( + mixed_precision=mixed_precision, + cpu=cpu, + deepspeed_plugin=deepspeed_plugin, + fsdp_plugin=fsdp_plugin, + _from_accelerator=True, + **kwargs, + ) + + if gradient_accumulation_steps > 1: + if self.state.distributed_type == DistributedType.TPU: + raise NotImplementedError( + "Gradient accumulation on TPU is not supported. Pass in `gradient_accumulation_steps=1`" + ) + + self.gradient_accumulation_steps = gradient_accumulation_steps + self.device_placement = device_placement + self.split_batches = split_batches + self.dispatch_batches = dispatch_batches + if dispatch_batches is True and is_torch_version("<", "1.8.0"): + raise ImportError( + "Using `DataLoaderDispatcher` requires PyTorch 1.8.0 minimum. You have {torch.__version__}." + ) + self.step_scheduler_with_optimizer = step_scheduler_with_optimizer + + # Mixed precision attributes + self.scaler = None + self.native_amp = False + err = "{mode} mixed precision requires {requirement}" + if self.state.mixed_precision == "fp16": + self.native_amp = is_torch_version(">=", "1.6") + if not self.native_amp: + raise ValueError(err.format(mode="fp16", requirement="PyTorch >= 1.6")) + if not torch.cuda.is_available(): + raise ValueError(err.format(mode="fp16", requirement="a GPU")) + kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None else {} + if self.distributed_type == DistributedType.FSDP: + from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler + + self.scaler = ShardedGradScaler(**kwargs) + else: + self.scaler = torch.cuda.amp.GradScaler(**kwargs) + elif self.state.mixed_precision == "bf16" and self.distributed_type != DistributedType.FSDP: + self.native_amp = is_bf16_available(True) + if mixed_precision == "bf16" and not self.native_amp and not is_tpu_available(): + raise ValueError(err.format(mode="bf16", requirement="PyTorch >= 1.10 and a supported device.")) + + # Only on the GPU do we care about scaling the gradients + if torch.cuda.is_available(): + kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None else {} + self.scaler = torch.cuda.amp.GradScaler(**kwargs) + + # Start of internal step tracking + self.step = 0 + self.gradient_state = GradientState() + + # Internal references to the training objects + self._optimizers = [] + self._models = [] + self._schedulers = [] + self._custom_objects = [] + + # RNG Types + self.rng_types = rng_types + if self.rng_types is None: + self.rng_types = ["torch"] if is_torch_version("<=", "1.5.1") else ["generator"] + + @property + def use_distributed(self): + return self.distributed_type != DistributedType.NO and self.num_processes > 1 + + @property + def distributed_type(self): + return self.state.distributed_type + + @property + def num_processes(self): + return self.state.num_processes + + @property + def process_index(self): + return self.state.process_index + + @property + def local_process_index(self): + return self.state.local_process_index + + @property + def device(self): + return self.state.device + + @property + def is_main_process(self): + """True for one process only.""" + return self.process_index == 0 + + @property + def is_local_main_process(self): + """True for one process per server.""" + return self.local_process_index == 0 + + @property + def use_fp16(self): + return self.mixed_precision != "no" + + @property + def mixed_precision(self): + if self.distributed_type == DistributedType.DEEPSPEED: + config = self.state.deepspeed_plugin.deepspeed_config + if config.get("fp16", {}).get("enabled", False): + mixed_precision = "fp16" + elif config.get("bf16", {}).get("enabled", False): + mixed_precision = "bf16" + else: + mixed_precision = "no" + else: + mixed_precision = self.state.mixed_precision + return mixed_precision + + @contextmanager + def local_main_process_first(self): + """ + Lets the local main process go inside a with block. + + The other processes will enter the with block after the main process exits. + """ + yield from self._goes_first(self.is_local_main_process) + + @contextmanager + def main_process_first(self): + """ + Lets the main process go first inside a with block. + + The other processes will enter the with block after the main process exits. + """ + yield from self._goes_first(self.is_main_process) + + def _goes_first(self, is_main): + if not is_main: + self.wait_for_everyone() + + yield + + if is_main: + self.wait_for_everyone() + + @contextmanager + def no_sync(self, model): + """ + A context manager to disable gradient synchronizations across DDP processes by calling + `torch.nn.parallel.DistributedDataParallel.no_sync`. + + If `model` is not in DDP, this context manager does nothing + + Args: + model (`torch.nn.Module`): + PyTorch Module that was prepared with `Accelerator.prepare` + """ + context = contextlib.nullcontext + if self.use_distributed: + context = getattr(model, "no_sync", context) + + with context(): + yield + + def _do_sync(self): + "Sets the right `sync_gradients` context and either resets or increases `self.step`" + if self.gradient_state.end_of_dataloader: + self.step = 0 + self.gradient_state._set_sync_gradients(True) + else: + self.step += 1 + self.gradient_state._set_sync_gradients((self.step % self.gradient_accumulation_steps) == 0) + + @property + def sync_gradients(self): + return self.gradient_state.sync_gradients + + @contextmanager + def accumulate(self, model): + """ + A context manager that will lightly wrap around and perform gradient accumulation automatically + + Args: + model (`torch.nn.Module`): + PyTorch Module that was prepared with `Accelerator.prepare` + """ + self._do_sync() + if self.sync_gradients: + context = contextlib.nullcontext + else: + context = self.no_sync + + with context(model): + yield + + def print(self, *args, **kwargs): + """ + Use in replacement of `print()` to only print once per server. + """ + if self.is_local_main_process: + print(*args, **kwargs) + + def _prepare_one(self, obj, first_pass=False): + # First pass of preparation: DataLoader, model, optimizer + if isinstance(obj, torch.utils.data.DataLoader) and first_pass: + return self.prepare_data_loader(obj) + elif isinstance(obj, torch.nn.Module) and first_pass: + self._models.append(obj) + return self.prepare_model(obj) + elif isinstance(obj, torch.optim.Optimizer) and first_pass: + optimizer = self.prepare_optimizer(obj) + self._optimizers.append(optimizer) + return optimizer + # Second pass of preparation: LR scheduler (which need the full list of optimizers) + elif isinstance(obj, torch.optim.lr_scheduler._LRScheduler) and not first_pass: + scheduler = self.prepare_scheduler(obj) + self._schedulers.append(scheduler) + return scheduler + else: + return obj + + def _prepare_fsdp(self, *args): + result = [] + for obj in args: + if isinstance(obj, torch.nn.Module): + model = obj + break + optimizers = [] + + self._schedulers = [] + self._models = [] + intermediate_result = [] + for obj in args: + if isinstance(obj, torch.optim.Optimizer): + if len(obj.param_groups) > 1: + logger.warn( + "FSDP Warning: When using FSDP, several parameter groups will be conflated into " + "a single one due to nested module wrapping and parameter flattening." + ) + optimizer = obj.optimizer.__class__(model.parameters(), **obj.optimizer.defaults) + obj = self.prepare_optimizer(optimizer) + optimizers.append(obj) + elif isinstance(obj, torch.nn.Module): + self._models.append(obj) + intermediate_result.append(obj) + + for obj in intermediate_result: + if isinstance(obj, AcceleratedScheduler): + obj.optimizer = optimizers + for i, opt in enumerate(self._optimizers): + if getattr(obj.scheduler, "optimizer", None) == opt.optimizer: + obj.scheduler.optimizer = optimizers[i] + obj.optimizers = [optimizers[i]] + break + self._schedulers.append(obj) + result.append(obj) + self._optimizers = optimizers + return tuple(result) + + def prepare(self, *args): + """ + Prepare all objects passed in `args` for distributed training and mixed precision, then return them in the same + order. + + Accepts the following type of objects: + + - `torch.utils.data.DataLoader`: PyTorch Dataloader + - `torch.nn.Module`: PyTorch Module + - `torch.optim.Optimizer`: PyTorch Optimizer + """ + if self.distributed_type == DistributedType.FSDP: + model_count = 0 + optimizer_present = False + for obj in args: + if isinstance(obj, torch.nn.Module): + model_count += 1 + if isinstance(obj, torch.optim.Optimizer): + optimizer_present = True + if model_count > 1 and optimizer_present: + raise ValueError( + "For FSDP to work with multiple models (>1), " + "prepare must be called for all the models before optimizers are created" + ) + elif model_count == 1 and optimizer_present: + logger.warn( + "FSDP Warning: When using FSDP, " + "it is efficient and recommended to call prepare for the model before creating the optimizer" + ) + + # On TPUs, putting the model on the XLA device will create new parameters, so the corresponding optimizer will + # have parameters disconnected from the model (so no training :-( ). + # If the model and optimizer have parameters on different devices we raise an error. + if self.distributed_type == DistributedType.TPU: + model_device, optimizer_device = self._get_devices() + if model_device is not None and optimizer_device is not None and model_device != optimizer_device: + raise ValueError( + "The model and the optimizer parameters are not on the same device, which probably means you " + "created an optimizer around your model **before** putting on the device. Make sure the line " + "model.to(device) is before the optimizer creation in your script or remove it entirely and use " + "the flag default value for `devicement_placement` in your `Accelerator` to let it handle that " + "part for you." + ) + + # If we're dealing with device placement, this deals with that by... + tpu_should_fix_optimizer = self.device_placement and self.distributed_type == DistributedType.TPU + if tpu_should_fix_optimizer: + # 1. grabbing old model parameters + old_named_params = self._get_named_parameters(*args) + + if self.distributed_type == DistributedType.DEEPSPEED: + result = self._prepare_deepspeed(*args) + else: + result = tuple(self._prepare_one(obj, first_pass=True) for obj in args) + result = tuple(self._prepare_one(obj) for obj in result) + + if tpu_should_fix_optimizer: + # 2. grabbing new model parameters + new_named_params = self._get_named_parameters(*result) + # 3. building a map from the first to the second + mapping = {p: new_named_params[n] for n, p in old_named_params.items()} + # 4. using that map to update the parameters of the optimizer + for obj in result: + if isinstance(obj, torch.optim.Optimizer): + obj._switch_parameters(mapping) + + if self.distributed_type == DistributedType.FSDP and model_count == 1 and optimizer_present: + result = self._prepare_fsdp(*result) + + return result if len(result) > 1 else result[0] + + def prepare_model(self, model): + if self.device_placement and self.distributed_type != DistributedType.FSDP: + model = model.to(self.device) + if self.distributed_type == DistributedType.MULTI_GPU: + kwargs = self.ddp_handler.to_kwargs() if self.ddp_handler is not None else {} + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[self.local_process_index], output_device=self.local_process_index, **kwargs + ) + elif self.distributed_type == DistributedType.FSDP: + from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP + + # Check if the model is already a FSDP model due to `Manual Wrapping` and if so, + # don't wrap it again + if type(model) != FSDP: + self.state.fsdp_plugin.set_auto_wrap_policy(model) + fsdp_plugin = self.state.fsdp_plugin + model = FSDP( + model, + sharding_strategy=fsdp_plugin.sharding_strategy, + cpu_offload=fsdp_plugin.cpu_offload, + auto_wrap_policy=fsdp_plugin.auto_wrap_policy, + backward_prefetch=fsdp_plugin.backward_prefetch, + mixed_precision=fsdp_plugin.mixed_precision_policy, + ignored_modules=fsdp_plugin.ignored_modules, + ) + if not fsdp_plugin.cpu_offload.offload_params: + model.to(self.device) + elif self.distributed_type == DistributedType.MULTI_CPU: + kwargs = self.ddp_handler.to_kwargs() if self.ddp_handler is not None else {} + model = torch.nn.parallel.DistributedDataParallel(model, **kwargs) + if self.native_amp: + if self.mixed_precision == "fp16" and is_torch_version(">=", "1.10"): + model.forward = torch.cuda.amp.autocast(dtype=torch.float16)(model.forward) + elif self.mixed_precision == "bf16" and self.distributed_type != DistributedType.TPU: + device_type = "cuda" if torch.cuda.is_available() else "cpu" + model.forward = torch.autocast(device_type=device_type, dtype=torch.bfloat16)(model.forward) + else: + model.forward = torch.cuda.amp.autocast()(model.forward) + model.forward = convert_outputs_to_fp32(model.forward) + if self.distributed_type == DistributedType.TPU and self.state.fork_launched: + model = xmp.MpModelWrapper(model).to(self.device) + return model + + def _prepare_deepspeed(self, *args): + + deepspeed_plugin = self.state.deepspeed_plugin + + result = [ + self._prepare_one(obj, first_pass=True) if isinstance(obj, torch.utils.data.DataLoader) else obj + for obj in args + ] + + batch_sizes = [obj.batch_size for obj in args if hasattr(obj, "batch_size")] + if self.split_batches: + batch_sizes = [batch_size // self.num_processes for batch_size in batch_sizes] + if len(batch_sizes) == 0: + raise ValueError( + "You must specify a training or evaluation dataloader in `accelerate.prepare()` when using DeepSpeed." + ) + + batch_size_per_device = min(batch_sizes) if deepspeed_plugin.is_train_batch_min else max(batch_sizes) + if len(batch_sizes) > 1: + logger.info( + "Since you passed both train and evaluation dataloader, `is_train_batch_min` (here " + f"{deepspeed_plugin.is_train_batch_min} will decide the `train_batch_size` ({batch_size_per_device})." + ) + + config_kwargs = { + "train_micro_batch_size_per_gpu": batch_size_per_device, + "train_batch_size": batch_size_per_device + * deepspeed_plugin.deepspeed_config["gradient_accumulation_steps"] + * self.num_processes, + "gradient_clipping": 1.0, + "zero_optimization.stage3_gather_16bit_weights_on_model_save": False, + } + + model = None + optimizer = None + scheduler = None + for obj in result: + if isinstance(obj, torch.nn.Module): + model = obj + elif isinstance(obj, (torch.optim.Optimizer, DummyOptim)): + optimizer = obj + elif (isinstance(obj, (torch.optim.lr_scheduler._LRScheduler, DummyScheduler))) or ( + type(obj).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES + ): + scheduler = obj + + if optimizer is not None: + if "optimizer" in deepspeed_plugin.deepspeed_config and not isinstance(optimizer, (DummyOptim)): + raise ValueError( + "You cannot specify an optimizer in the config file and in the code at the same time. " + "Please remove the optimizer from the config file or " + "create `accelerate.utils.DummyOptim` in the code." + ) + elif "optimizer" not in deepspeed_plugin.deepspeed_config and isinstance(optimizer, (DummyOptim)): + raise ValueError( + "You cannot create a `DummyOptim` without specifying an optimizer in the config file." + ) + + if isinstance(optimizer, (torch.optim.Optimizer)): + deepspeed_plugin.deepspeed_config["zero_allow_untested_optimizer"] = True + + if scheduler is not None: + if "scheduler" in deepspeed_plugin.deepspeed_config and not isinstance(scheduler, (DummyScheduler)): + raise ValueError( + "You cannot specify a scheduler in the config file and in the code at the same time. " + "Please remove the scheduler from the config file or " + "create `accelerate.utils.DummyScheduler` in the code." + ) + elif "scheduler" not in deepspeed_plugin.deepspeed_config and isinstance(scheduler, (DummyScheduler)): + raise ValueError( + "You cannot create a `DummyScheduler` without specifying a scheduler in the config file." + ) + + if optimizer is not None and scheduler is not None: + if isinstance(optimizer, (DummyOptim)) and not isinstance(scheduler, (DummyScheduler)): + raise ValueError( + "You can only specify `accelerate.utils.DummyScheduler` in the code when using " + "`accelerate.utils.DummyOptim`." + ) + + if model is not None: + if hasattr(model, "config") and hasattr(model.config, "hidden_size"): + hidden_size = model.config.hidden_size + config_kwargs.update( + { + "zero_optimization.reduce_bucket_size": hidden_size * hidden_size, + "zero_optimization.stage3_prefetch_bucket_size": 0.9 * hidden_size * hidden_size, + "zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size, + } + ) + + if isinstance(optimizer, (DummyOptim)): + config_kwargs.update( + {"optimizer.params.lr": optimizer.lr, "optimizer.params.weight_decay": optimizer.weight_decay} + ) + if isinstance(scheduler, (DummyScheduler)): + config_kwargs.update( + { + "scheduler.params.warmup_min_lr": 0, + "scheduler.params.warmup_max_lr": scheduler.optimizer.lr, + "scheduler.params.warmup_num_steps": scheduler.warmup_num_steps, + } + ) + if scheduler.total_num_steps is not None: + config_kwargs["scheduler.params.total_num_steps"] = ( + math.ceil(scheduler.total_num_steps / self.num_processes) + if not self.split_batches + else scheduler.total_num_steps + ) + deepspeed_plugin.deepspeed_config_process(must_match=False, **config_kwargs) + self.deepspeed_config = deepspeed_plugin.deepspeed_config + kwargs = dict(model=model, config_params=self.deepspeed_config) + if optimizer is not None: + if isinstance(optimizer, (DummyOptim)): + kwargs["model_parameters"] = optimizer.params + else: + kwargs["optimizer"] = optimizer + if scheduler is not None: + if type(scheduler).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES: + kwargs["lr_scheduler"] = scheduler + + engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs) + if optimizer is not None: + optimizer = DeepSpeedOptimizerWrapper(optimizer) + if scheduler is not None: + if lr_scheduler is None: + scheduler = AcceleratedScheduler( + scheduler, + optimizer, + step_with_optimizer=self.step_scheduler_with_optimizer, + split_batches=self.split_batches, + ) + else: + scheduler = DeepSpeedSchedulerWrapper(lr_scheduler, optimizer) + + for i in range(len(result)): + if isinstance(result[i], torch.nn.Module): + result[i] = engine + elif isinstance(result[i], (torch.optim.Optimizer, DummyOptim)): + result[i] = optimizer + elif (isinstance(result[i], (torch.optim.lr_scheduler._LRScheduler, DummyScheduler))) or ( + type(result[i]).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES + ): + result[i] = scheduler + # pointing for deepspeed_engine_wrapped.backward() + self.deepspeed_engine_wrapped = DeepSpeedEngineWrapper(engine) + self._models.append(engine) + if optimizer is not None: + self._optimizers.append(optimizer) + if scheduler is not None: + self._schedulers.append(scheduler) + if len(self._models) > 1: + raise AssertionError( + "You can't use same `Accelerator()` instance with multiple models when using DeepSpeed" + ) + return tuple(result) + + def prepare_data_loader(self, data_loader): + return prepare_data_loader( + data_loader, + self.device, + num_processes=self.num_processes, + process_index=self.process_index, + split_batches=self.split_batches, + put_on_device=self.device_placement if self.distributed_type != DistributedType.TPU else False, + rng_types=self.rng_types.copy(), + dispatch_batches=self.dispatch_batches, + ) + + def prepare_optimizer(self, optimizer): + return AcceleratedOptimizer(optimizer, device_placement=self.device_placement, scaler=self.scaler) + + def prepare_scheduler(self, scheduler): + # We try to find the optimizer associated with `scheduler`, the default is the full list. + optimizer = self._optimizers + for opt in self._optimizers: + if getattr(scheduler, "optimizer", None) == opt.optimizer: + optimizer = opt + break + + return AcceleratedScheduler( + scheduler, + optimizer, + step_with_optimizer=self.step_scheduler_with_optimizer, + split_batches=self.split_batches, + ) + + def backward(self, loss, **kwargs): + """ + Use `accelerator.backward(loss)` in lieu of `loss.backward()`. + """ + loss /= self.gradient_accumulation_steps + if self.distributed_type == DistributedType.DEEPSPEED: + self.deepspeed_engine_wrapped.backward(loss, **kwargs) + elif self.scaler is not None: + self.scaler.scale(loss).backward(**kwargs) + else: + loss.backward(**kwargs) + + def unscale_gradients(self, optimizer=None): + """ + Unscale the gradients in mixed precision training with AMP. This is a noop in all other settings. + + Args: + optimizer (`torch.optim.Optimizer` or `List[torch.optim.Optimizer]`, *optional*): + The optimizer(s) for which to unscale gradients. If not set, will unscale gradients on all optimizers + that were passed to [`~Accelerator.prepare`]. + """ + if self.use_fp16 and self.native_amp: + if optimizer is None: + # TODO: this unscales all optimizers where we should only unscale the one where parameters are. + optimizer = self._optimizers + elif not isinstance(optimizer, (tuple, list)): + optimizer = [optimizer] + for opt in optimizer: + while isinstance(opt, AcceleratedOptimizer): + opt = opt.optimizer + self.scaler.unscale_(opt) + + def clip_grad_norm_(self, parameters, max_norm, norm_type=2): + """ + Should be used in place of `torch.nn.utils.clip_grad_norm_`. + """ + if self.distributed_type == DistributedType.FSDP: + self.unscale_gradients() + parameters = [p for p in parameters] + for model in self._models: + if parameters == [p for p in model.parameters()]: + model.clip_grad_norm_(max_norm, norm_type) + return + elif self.distributed_type == DistributedType.DEEPSPEED: + # `accelerator.backward(loss)` is doing that automatically. Therefore, it's implementation is not needed + return + self.unscale_gradients() + torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=norm_type) + + def clip_grad_value_(self, parameters, clip_value): + """ + Should be used in place of `torch.nn.utils.clip_grad_value_`. + """ + if self.distributed_type in [DistributedType.DEEPSPEED, DistributedType.FSDP]: + raise Exception("DeepSpeed and FSDP do not support `clip_grad_value_`. Use `clip_grad_norm_` instead.") + self.unscale_gradients() + torch.nn.utils.clip_grad_value_(parameters, clip_value) + + def gather(self, tensor): + """ + Gather the values in *tensor* across all processes and concatenate them on the first dimension. Useful to + regroup the predictions from all processes when doing evaluation. + + Note: + This gather happens in all processes. + + Args: + tensor (`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`): + The tensors to gather across all processes. + + Returns: + `torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: The gathered tensor(s). Note that the + first dimension of the result is *num_processes* multiplied by the first dimension of the input tensors. + """ + return gather(tensor) + + def reduce(self, tensor, reduction="sum"): + """ + Reduce the values in *tensor* across all processes based on *reduction*. + + Note: + All processes get the reduced value. + + Args: + tensor (`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`): + The tensors to reduce across all processes. + reduction (`str`, *optional*, defaults to "sum"): + A reduction type, can be one of 'sum', 'mean', or 'none'. If 'none', will not perform any operation. + + Returns: + `torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: The reduced tensor(s). + """ + return reduce(tensor, reduction) + + def pad_across_processes(self, tensor, dim=0, pad_index=0, pad_first=False): + """ + Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so + they can safely be gathered. + + Args: + tensor (nested list/tuple/dictionary of `torch.Tensor`): + The data to gather. + dim (`int`, *optional*, defaults to 0): + The dimension on which to pad. + pad_index (`int`, *optional*, defaults to 0): + The value with which to pad. + pad_first (`bool`, *optional*, defaults to `False`): + Whether to pad at the beginning or the end. + """ + return pad_across_processes(tensor, dim=dim, pad_index=pad_index, pad_first=pad_first) + + def unwrap_model(self, model): + """ + Unwraps the `model` from the additional layer possible added by [`~Accelerator.prepare`]. Useful before saving + the model. + + Args: + model (`torch.nn.Module`): + The model to unwrap. + """ + return extract_model_from_parallel(model) + + def wait_for_everyone(self): + """ + Will stop the execution of the current process until every other process has reached that point (so this does + nothing when the script is only run in one process). Useful to do before saving a model. + """ + wait_for_everyone() + + def init_trackers(self, project_name: str, config: Optional[dict] = None): + """ + Initializes a run for all trackers stored in `self.log_with`, potentially with starting configurations + + Args: + project_name (`str`): + The name of the project. All trackers will save their data based on this + config (`dict`, *optional*): + Optional starting configuration to be logged. + """ + self.trackers = [] + for tracker in self.log_with: + if issubclass(type(tracker), GeneralTracker): + # Custom trackers are already initialized + self.trackers.append(tracker) + else: + tracker_init = LOGGER_TYPE_TO_CLASS[str(tracker)] + if getattr(tracker_init, "requires_logging_directory"): + # We can skip this check since it was done in `__init__` + self.trackers.append(tracker_init(project_name, self.logging_dir)) + else: + self.trackers.append(tracker_init(project_name)) + if config is not None: + for tracker in self.trackers: + tracker.store_init_configuration(config) + + def log(self, values: dict, step: Optional[int] = None): + """ + Logs `values` to all stored trackers in `self.trackers`. + + Args: + values (`dict`): + Values should be a dictionary-like object containing only types `int`, `float`, or `str`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + if self.is_main_process: + for tracker in self.trackers: + tracker.log(values, step=step) + + def end_training(self): + """ + Runs any special end training behaviors, such as stopping trackers + """ + if self.is_main_process: + for tracker in self.trackers: + tracker.finish() + + def save(self, obj, f): + """ + Save the object passed to disk once per machine. Use in place of `torch.save`. + + Args: + obj: The object to save. + f (`str` or `os.PathLike`): + Where to save the content of `obj`. + """ + save(obj, f) + + def save_state(self, output_dir: str): + """ + Saves the current states of the model, optimizer, scaler, RNG generators, and registered objects. + + Args: + output_dir (`str` or `os.PathLike`): + The name of the folder to save all relevant weights and states. + """ + # Check if folder exists + output_dir = os.path.expanduser(output_dir) + os.makedirs(output_dir, exist_ok=True) + logger.info(f"Saving current state to {output_dir}") + weights = [self.get_state_dict(m, unwrap=False) for m in self._models] + save_location = save_accelerator_state( + output_dir, weights, self._optimizers, self._schedulers, self.state.process_index, self.scaler + ) + for i, obj in enumerate(self._custom_objects): + save_custom_state(obj, output_dir, i) + return save_location + + def load_state(self, input_dir: str): + """ + Loads the current states of the model, optimizer, scaler, RNG generators, and registered objects. + + Args: + input_dir (`str` or `os.PathLike`): + The name of the folder all relevant weights and states were saved in. + """ + # Check if folder exists + input_dir = os.path.expanduser(input_dir) + if not os.path.isdir(input_dir): + raise ValueError(f"Tried to find {input_dir} but folder does not exist") + logger.info(f"Loading states from {input_dir}") + load_accelerator_state( + input_dir, self._models, self._optimizers, self._schedulers, self.state.process_index, self.scaler + ) + custom_checkpoints = [f for f in os.listdir(input_dir) if "custom_checkpoint" in f] + if len(custom_checkpoints) != len(self._custom_objects): + err = "Warning! Number of found checkpoints does not match the number of registered objects:" + err += f"\n\tFound checkpoints: {len(custom_checkpoints)}" + err += f"\n\tRegistered objects: {len(self._custom_objects)}\nSkipping." + logger.warn(err) + else: + logger.info(f"Loading in {len(custom_checkpoints)} custom states") + for index, obj in enumerate(self._custom_objects): + load_custom_state(obj, input_dir, index) + + def free_memory(self): + """ + Will release all references to the internal objects stored and call the garbage collector. You should call this + method between two trainings with different models/optimizers. + """ + self._schedulers = [] + self._optimizers = [] + self._models = [] + self.deepspeed_engine_wrapped = None + gc.collect() + torch.cuda.empty_cache() + + def clear(self): + """ + Alias for [`Accelerate.free_memory`], releases all references to the internal objects stored and call the + garbage collector. You should call this method between two trainings with different models/optimizers. + """ + self.free_memory() + + def _get_named_parameters(self, *args): + named_parameters = {} + for obj in args: + if isinstance(obj, torch.nn.Module): + obj = extract_model_from_parallel(obj) + named_parameters.update({n: p for n, p in obj.named_parameters()}) + return named_parameters + + def _get_devices(self, *args): + model_device = None + optimizer_device = None + for obj in args: + # Loop through model parameters and stop at the first once we have its device. + if isinstance(obj, torch.nn.Module): + for param in obj.parameters(): + model_device = param.device + break + # Loop through optimizer parameters groups and stop at the first once we have its device. + if isinstance(obj, torch.optim.Optimizer): + for param_group in obj.param_groups: + if len(param_group["params"]) > 0: + optimizer_device = param_group["params"][0].device + break + return (model_device, optimizer_device) + + def get_state_dict(self, model, unwrap=True): + is_zero_3 = False + if self.distributed_type == DistributedType.DEEPSPEED: + is_zero_3 = self.deepspeed_config["zero_optimization"]["stage"] == 3 + + if is_zero_3: + if model.zero_gather_16bit_weights_on_model_save(): + state_dict = model._zero3_consolidated_16bit_state_dict() + else: + raise ValueError( + "Cannot get 16bit model weights because `stage3_gather_16bit_weights_on_model_save` in DeepSpeed config is False. " + "To save the model weights in 16bit, set `stage3_gather_16bit_weights_on_model_save` to True in DeepSpeed config file or " + "set `zero3_save_16bit_model` to True when using `accelerate config`. " + "To save the full checkpoint, run `model.save_checkpoint(save_dir)` and use `zero_to_fp32.py` to recover weights." + ) + else: + if unwrap: + model = self.unwrap_model(model) + state_dict = model.state_dict() + + if state_dict is not None: + for k in state_dict: + if state_dict[k].dtype == torch.float16: + state_dict[k] = state_dict[k].float() + + return state_dict + + def register_for_checkpointing(self, *objects): + """ + Makes note of `objects` and will save or load them in during `save_state` or `load_state`. + + These should be utilized when the state is being loaded or saved in the same script. It is not designed to be + used in different scripts + + + + Every `object` must have a `load_state_dict` and `state_dict` function to be stored. + + + """ + invalid_objects = [] + for obj in objects: + if not hasattr(obj, "state_dict") or not hasattr(obj, "load_state_dict"): + invalid_objects.append(obj) + if len(invalid_objects) > 0: + err = "All `objects` must include a `state_dict` and `load_state_dict` function to be stored. The following inputs are invalid:" + for index, obj in enumerate(invalid_objects): + err += f"\n\t- Item at index {index}, `{get_pretty_name(obj)}`" + raise ValueError(err) + self._custom_objects.extend(objects) + + @contextmanager + def autocast(self): + """ + Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing + different will happen otherwise. + """ + if self.native_amp: + if self.mixed_precision == "fp16" and is_torch_version(">=", "1.10"): + autocast_context = torch.cuda.amp.autocast(dtype=torch.float16) + elif self.mixed_precision == "bf16" and is_bf16_available(): + if self.distributed_type in [DistributedType.NO, DistributedType.MULTI_CPU, DistributedType.MULTI_GPU]: + device_type = "cpu" if not torch.cuda.is_available() else "cuda" + autocast_context = torch.autocast(dtype=torch.bfloat16, device_type=device_type) + else: + autocast_context = torch.cuda.amp.autocast() + + autocast_context.__enter__() + yield + autocast_context.__exit__(*sys.exc_info()) + else: + yield + + @property + def optimizer_step_was_skipped(self): + """ + Whether or not the optimizer update was skipped (because of gradient overflow in mixed precision), in which + case the learning rate should not be changed. + """ + for optimizer in self._optimizers: + if optimizer.step_was_skipped: + return True + return False diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/big_modeling.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/big_modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..85b1f3237d5398e71c20f5da112698b6df429f3b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/big_modeling.py @@ -0,0 +1,324 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from contextlib import contextmanager +from typing import Dict, List, Optional, Union + +import torch +import torch.nn as nn + +from .hooks import AlignDevicesHook, add_hook_to_module, attach_align_device_hook, attach_align_device_hook_on_blocks +from .utils import ( + OffloadedWeightsLoader, + check_device_map, + extract_submodules_state_dict, + infer_auto_device_map, + load_checkpoint_in_model, + offload_state_dict, +) + + +@contextmanager +def init_empty_weights(include_buffers: bool = False): + """ + A context manager under which models are initialized with all parameters on the meta device, therefore creating an + empty model. Useful when just initializing the model would blow the available RAM. + + Args: + include_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to also put all buffers on the meta device while initializing. + + Example: + + ```pyton + import torch.nn as nn + from accelerate import init_empty_weights + + # Initialize a model with 100 billions parameters in no time and without using any RAM. + with init_empty_weights(): + tst = nn.Sequential(*[nn.Linear(10000, 10000) for _ in range(1000)]) + ``` + + + + Any model created under this context manager has no weights. As such you can't do something like + `model.to(some_device)` with it. To load weights inside your empty model, see [`load_checkpoint_and_dispatch`]. + + + """ + old_register_parameter = nn.Module.register_parameter + if include_buffers: + old_register_buffer = nn.Module.register_buffer + + def register_empty_parameter(module, name, param): + old_register_parameter(module, name, param) + if param is not None: + param_cls = type(module._parameters[name]) + kwargs = module._parameters[name].__dict__ + module._parameters[name] = param_cls(module._parameters[name].to(torch.device("meta")), **kwargs) + + def register_empty_buffer(module, name, buffer): + old_register_buffer(module, name, buffer) + if buffer is not None: + module._buffers[name] = module._buffers[name].to(torch.device("meta")) + + try: + nn.Module.register_parameter = register_empty_parameter + if include_buffers: + nn.Module.register_buffer = register_empty_buffer + yield + finally: + nn.Module.register_parameter = old_register_parameter + if include_buffers: + nn.Module.register_buffer = old_register_buffer + + +def cpu_offload( + model: nn.Module, + execution_device: Optional[torch.device] = None, + offload_buffers: bool = False, + state_dict: Optional[Dict[str, torch.Tensor]] = None, + preload_module_classes: Optional[List[str]] = None, +): + """ + Activates full CPU offload for a model. As a result, all parameters of the model will be offloaded and only one + copy of the state dict of the model will be kept. During the forward pass, parameters will be extracted from that + state dict and put on the execution device passed as they are needed, then offloaded again. + + Args: + model (`torch.nn.Module`): + The model to offload. + execution_device (`torch.device`, *optional*): + The device on which the forward pass of the model will be executed (should be a GPU). Will default to the + model first parameter device. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to offload the buffers with the model parameters. + state_dict (`Dict[str, torch.Tensor]`, *optional*): + The state dict of the model that will be kept on CPU. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + if execution_device is None: + execution_device = next(iter(model.parameters())).device + if state_dict is None: + state_dict = {n: p.to("cpu") for n, p in model.state_dict().items()} + attach_align_device_hook( + model, + execution_device=execution_device, + offload=True, + offload_buffers=offload_buffers, + weights_map=state_dict, + preload_module_classes=preload_module_classes, + ) + add_hook_to_module(model, AlignDevicesHook(io_same_device=True)) + return model + + +def disk_offload( + model: nn.Module, + offload_dir: Union[str, os.PathLike], + execution_device: Optional[torch.device] = None, + offload_buffers: bool = False, + preload_module_classes: Optional[List[str]] = None, +): + """ + Activates full disk offload for a model. As a result, all parameters of the model will be offloaded as + memory-mapped array in a given folder. During the forward pass, parameters will be accessed from that folder and + put on the execution device passed as they are needed, then offloaded again. + + Args: + model (`torch.nn.Module`): The model to offload. + offload_dir (`str` or `os.PathLike`): + The folder in which to offload the model weights (or where the model weights are already offloaded). + execution_device (`torch.device`, *optional*): + The device on which the forward pass of the model will be executed (should be a GPU). Will default to the + model's first parameter device. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to offload the buffers with the model parameters. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + if not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json")): + offload_state_dict(offload_dir, model.state_dict()) + if execution_device is None: + execution_device = next(iter(model.parameters())).device + weights_map = OffloadedWeightsLoader(save_folder=offload_dir) + attach_align_device_hook( + model, + execution_device=execution_device, + offload=True, + offload_buffers=offload_buffers, + weights_map=weights_map, + preload_module_classes=preload_module_classes, + ) + add_hook_to_module(model, AlignDevicesHook(io_same_device=True)) + return model + + +def dispatch_model( + model: nn.Module, + device_map: Dict[str, Union[str, int, torch.device]], + main_device: Optional[torch.device] = None, + state_dict: Optional[Dict[str, torch.Tensor]] = None, + offload_dir: Union[str, os.PathLike] = None, + offload_buffers: bool = False, + preload_module_classes: Optional[List[str]] = None, +): + """ + Dispatches a model according to a given device map. Layers of the model might be spread across GPUs, offloaded on + the CPU or even the disk. + + Args: + model (`torch.nn.Module`): + The model to dispatch. + device_map (`Dict[str, Union[str, int, torch.device]]`): + A dictionary mapping module names in the models `state_dict` to the device they should go to. Note that + `"disk"` is accepted even if it's not a proper value for `torch.device`. + main_device (`str`, `int` or `torch.device`, *optional*): + The main execution device. Will default to the first device in the `device_map` different from `"cpu"` or + `"disk"`. + state_dict (`Dict[str, torch.Tensor]`, *optional*): + The state dict of the part of the model that will be kept on CPU. + offload_dir (`str` or `os.PathLike`): + The folder in which to offload the model weights (or where the model weights are already offloaded). + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to offload the buffers with the model parameters. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + # Error early if the device map is incomplete. + check_device_map(model, device_map) + + if main_device is None: + main_device = [d for d in device_map.values() if d not in ["cpu", "disk"]][0] + + cpu_modules = [name for name, device in device_map.items() if device == "cpu"] + if state_dict is None and len(cpu_modules) > 0: + state_dict = extract_submodules_state_dict(model.state_dict(), cpu_modules) + + disk_modules = [name for name, device in device_map.items() if device == "disk"] + if offload_dir is None and len(disk_modules) > 0: + raise ValueError( + "We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules " + f"need to be offloaded: {', '.join(disk_modules)}." + ) + if len(disk_modules) > 0 and ( + not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json")) + ): + disk_state_dict = extract_submodules_state_dict(model.state_dict(), disk_modules) + offload_state_dict(offload_dir, disk_state_dict) + + execution_device = { + name: main_device if device in ["cpu", "disk"] else device for name, device in device_map.items() + } + offload = {name: device in ["cpu", "disk"] for name, device in device_map.items()} + save_folder = offload_dir if len(disk_modules) > 0 else None + if state_dict is not None or save_folder is not None: + weights_map = OffloadedWeightsLoader(state_dict=state_dict, save_folder=save_folder) + else: + weights_map = None + + attach_align_device_hook_on_blocks( + model, + execution_device=execution_device, + offload=offload, + offload_buffers=offload_buffers, + weights_map=weights_map, + preload_module_classes=preload_module_classes, + ) + model.hf_device_map = device_map + return model + + +def load_checkpoint_and_dispatch( + model: nn.Module, + checkpoint: Union[str, os.PathLike], + device_map: Optional[Union[str, Dict[str, Union[int, str, torch.device]]]] = None, + max_memory: Optional[Dict[Union[int, str], Union[int, str]]] = None, + no_split_module_classes: Optional[List[str]] = None, + offload_folder: Optional[Union[str, os.PathLike]] = None, + offload_buffers: bool = False, + dtype: Optional[Union[str, torch.dtype]] = None, + offload_state_dict: bool = False, + preload_module_classes: Optional[List[str]] = None, +): + """ + Loads a (potentially sharded) checkpoint inside a model, potentially sending weights to a given device as they are + loaded and adds the various hooks that will make this model run properly (even if split across devices). + + Args: + model (`torch.nn.Module`): The model in which we want to load a checkpoint. + checkpoint (`str` or `os.PathLike`): + The folder checkpoint to load. It can be: + - a path to a file containing a whole model state dict + - a path to a `.json` file containing the index to a sharded checkpoint + - a path to a folder containing a unique `.index.json` file and the shards of a checkpoint. + device_map (`Dict[str, Union[int, str, torch.device]]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each parameter/buffer + name, once a given module name is inside, every submodule of it will be sent to the same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. + max_memory (`Dict`, *optional*): + A dictionary device identifier to maximum memory. Will default to the maximum memory available for each GPU + and the available CPU RAM if unset. + no_split_module_classes (`List[str]`, *optional*): + A list of layer class names that should never be split across device (for instance any layer that has a + residual connection). + offload_folder (`str` or `os.PathLike`, *optional*): + If the `device_map` contains any value `"disk"`, the folder where we will offload weights. + offload_buffers (`bool`, *optional*, defaults to `False`): + In the layers that are offloaded on the CPU or the hard drive, whether or not to offload the buffers as + well as the parameters. + dtype (`str` or `torch.dtype`, *optional*): + If provided, the weights will be converted to that type when loaded. + offload_state_dict (`bool`, *optional*, defaults to `False`): + If `True`, will temporarily offload the CPU state dict on the hard drive to avoig getting out of CPU RAM if + the weight of the CPU state dict + the biggest shard does not fit. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + if device_map == "auto": + device_map = infer_auto_device_map( + model, max_memory=max_memory, no_split_module_classes=no_split_module_classes, dtype=dtype + ) + load_checkpoint_in_model( + model, + checkpoint, + device_map=device_map, + offload_folder=offload_folder, + dtype=dtype, + offload_state_dict=offload_state_dict, + ) + if device_map is None: + return model + return dispatch_model( + model, + device_map=device_map, + offload_dir=offload_folder, + offload_buffers=offload_buffers, + preload_module_classes=preload_module_classes, + ) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/checkpointing.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/checkpointing.py new file mode 100644 index 0000000000000000000000000000000000000000..d5e816aab1019fc40f744112ca20d926efe35c86 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/checkpointing.py @@ -0,0 +1,185 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import random +from pathlib import Path +from typing import List + +import numpy as np +import torch +from torch.cuda.amp import GradScaler + +from .utils import ( + MODEL_NAME, + OPTIMIZER_NAME, + RNG_STATE_NAME, + SCALER_NAME, + SCHEDULER_NAME, + get_pretty_name, + is_tpu_available, + save, +) + + +if is_tpu_available(check_device=False): + import torch_xla.core.xla_model as xm + +from .logging import get_logger + + +logger = get_logger(__name__) + + +def save_accelerator_state( + output_dir: str, + model_states: List[dict], + optimizers: list, + schedulers: list, + process_index: int, + scaler: GradScaler = None, +): + """ + Saves the current states of the models, optimizers, scaler, and RNG generators to a given directory. + + Args: + output_dir (`str` or `os.PathLike`): + The name of the folder to save all relevant weights and states. + model_states (`List[torch.nn.Module]`): + A list of model states + optimizers (`List[torch.optim.Optimizer]`): + A list of optimizer instances + schedulers (`List[torch.optim.lr_scheduler._LRScheduler]`): + A list of learning rate schedulers + process_index (`int`): + The current process index in the Accelerator state + scaler (`torch.cuda.amp.GradScaler`, *optional*): + An optional gradient scaler instance to save + """ + # Model states + for i, state in enumerate(model_states): + weights_name = f"{MODEL_NAME}.bin" if i == 0 else f"{MODEL_NAME}_{i}.bin" + output_model_file = os.path.join(output_dir, weights_name) + save(state, output_model_file) + logger.info(f"Model weights saved in {output_model_file}") + # Optimizer states + for i, opt in enumerate(optimizers): + state = opt.state_dict() + optimizer_name = f"{OPTIMIZER_NAME}.bin" if i == 0 else f"{OPTIMIZER_NAME}_{i}.bin" + output_optimizer_file = os.path.join(output_dir, optimizer_name) + save(state, output_optimizer_file) + logger.info(f"Optimizer state saved in {output_optimizer_file}") + # Scheduler states + for i, scheduler in enumerate(schedulers): + state = scheduler.state_dict() + scheduler_name = f"{SCHEDULER_NAME}.bin" if i == 0 else f"{SCHEDULER_NAME}_{i}.bin" + output_scheduler_file = os.path.join(output_dir, scheduler_name) + save(state, output_scheduler_file) + logger.info(f"Scheduler state saved in {output_scheduler_file}") + # GradScaler state + if scaler is not None: + state = scaler.state_dict() + output_scaler_file = os.path.join(output_dir, SCALER_NAME) + torch.save(state, output_scaler_file) + logger.info(f"Gradient scaler state saved in {output_scaler_file}") + # Random number generator states + states = {} + states_name = f"{RNG_STATE_NAME}_{process_index}.pkl" + states["random_state"] = random.getstate() + states["numpy_random_seed"] = np.random.get_state() + states["torch_manual_seed"] = torch.get_rng_state() + states["torch_cuda_manual_seed"] = torch.cuda.get_rng_state_all() + # ^^ safe to call this function even if cuda is not available + if is_tpu_available(): + states["xm_seed"] = xm.get_rng_state() + output_states_file = os.path.join(output_dir, states_name) + torch.save(states, output_states_file) + logger.info(f"Random states saved in {output_states_file}") + return output_dir + + +def load_accelerator_state(input_dir, models, optimizers, schedulers, process_index, scaler=None): + """ + Loads states of the models, optimizers, scaler, and RNG generators from a given directory. + + Args: + input_dir (`str` or `os.PathLike`): + The name of the folder to load all relevant weights and states. + models (`List[torch.nn.Module]`): + A list of model instances + optimizers (`List[torch.optim.Optimizer]`): + A list of optimizer instances + schedulers (`List[torch.optim.lr_scheduler._LRScheduler]`): + A list of learning rate schedulers + process_index (`int`): + The current process index in the Accelerator state + scaler (`torch.cuda.amp.GradScaler`, *optional*): + An optional *GradScaler* instance to load + """ + # Model states + for i, model in enumerate(models): + weights_name = f"{MODEL_NAME}.bin" if i == 0 else f"{MODEL_NAME}_{i}.bin" + input_model_file = os.path.join(input_dir, weights_name) + models[i].load_state_dict(torch.load(input_model_file, map_location="cpu")) + logger.info("All model weights loaded successfully") + + # Optimizer states + for i, opt in enumerate(optimizers): + optimizer_name = f"{OPTIMIZER_NAME}.bin" if i == 0 else f"{OPTIMIZER_NAME}_{i}.bin" + input_optimizer_file = os.path.join(input_dir, optimizer_name) + optimizers[i].load_state_dict(torch.load(input_optimizer_file, map_location="cpu")) + logger.info("All optimizer states loaded successfully") + + # Scheduler states + for i, scheduler in enumerate(schedulers): + scheduler_name = f"{SCHEDULER_NAME}.bin" if i == 0 else f"{SCHEDULER_NAME}_{i}.bin" + input_scheduler_file = os.path.join(input_dir, scheduler_name) + scheduler.load_state_dict(torch.load(input_scheduler_file)) + logger.info("All scheduler states loaded successfully") + + # GradScaler state + if scaler is not None: + input_scaler_file = os.path.join(input_dir, SCALER_NAME) + scaler.load_state_dict(torch.load(input_scaler_file)) + logger.info("GradScaler state loaded successfully") + + # Random states + states = torch.load(os.path.join(input_dir, f"{RNG_STATE_NAME}_{process_index}.pkl")) + random.setstate(states["random_state"]) + np.random.set_state(states["numpy_random_seed"]) + torch.set_rng_state(states["torch_manual_seed"]) + torch.cuda.set_rng_state_all(states["torch_cuda_manual_seed"]) + # ^^ safe to call this function even if cuda is not available + if is_tpu_available(): + xm.set_rng_state(states["xm_seed"]) + logger.info("All random states loaded successfully") + + +def save_custom_state(obj, path, index: int = 0): + """ + Saves the state of `obj` to `{path}/custom_checkpoint_{index}.pkl` + """ + # Should this be the right way to get a qual_name type value from `obj`? + save_location = Path(path) / f"custom_checkpoint_{index}.pkl" + logger.info(f"Saving the state of {get_pretty_name(obj)} to {save_location}") + torch.save(obj.state_dict(), save_location) + + +def load_custom_state(obj, path, index: int = 0): + """ + Loads the state of `obj` at `{path}/custom_checkpoint_{index}.pkl` + """ + load_location = f"{path}/custom_checkpoint_{index}.pkl" + logger.info(f"Loading the state of {get_pretty_name(obj)} from {load_location}") + obj.load_state_dict(torch.load(load_location)) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/data_loader.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..fddfe8955331ec20cbc03e1f86c05ec8c1f4c2ef --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/data_loader.py @@ -0,0 +1,654 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import List, Optional, Union + +import torch +from torch.utils.data import BatchSampler, DataLoader, IterableDataset + +from .logging import get_logger +from .state import AcceleratorState, DistributedType, GradientState, is_tpu_available +from .utils import ( + RNGType, + broadcast, + broadcast_object_list, + concatenate, + find_batch_size, + get_data_structure, + initialize_tensors, + is_torch_version, + send_to_device, + slice_tensors, + synchronize_rng_states, +) + + +if is_tpu_available(check_device=False): + import torch_xla.distributed.parallel_loader as xpl + + class MpDeviceLoaderWrapper(xpl.MpDeviceLoader): + """ + Wrapper for the xpl.MpDeviceLoader class that knows the total batch size. + + **Available attributes:** + + - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes. + Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total + number of processes + """ + + @property + def total_batch_size(self): + return self._loader.total_batch_size + + +logger = get_logger(__name__) + +# kwargs of the DataLoader in min version 1.4.0. +_PYTORCH_DATALOADER_KWARGS = { + "batch_size": 1, + "shuffle": False, + "sampler": None, + "batch_sampler": None, + "num_workers": 0, + "collate_fn": None, + "pin_memory": False, + "drop_last": False, + "timeout": 0, + "worker_init_fn": None, + "multiprocessing_context": None, +} + +# kwargs added after by version +_PYTORCH_DATALOADER_ADDITIONAL_KWARGS = { + "1.6.0": {"generator": None}, + "1.7.0": {"prefetch_factor": 2, "persistent_workers": False}, +} + +for v, additional_kwargs in _PYTORCH_DATALOADER_ADDITIONAL_KWARGS.items(): + if is_torch_version(">=", v): + _PYTORCH_DATALOADER_KWARGS.update(additional_kwargs) + + +class BatchSamplerShard(BatchSampler): + """ + Wraps a PyTorch `BatchSampler` to generate batches for one of the processes only. Instances of this class will + always yield a number of batches that is a round multiple of `num_processes` and that all have the same size. + Depending on the value of the `drop_last` attribute of the batch sampler passed, it will either stop the iteration + at the first batch that would be too small / not present on all processes or loop with indices from the beginning. + + Args: + batch_sampler (`torch.utils.data.sampler.BatchSampler`): + The batch sampler to split in several shards. + num_processes (`int`, *optional*, defaults to 1): + The number of processes running concurrently. + process_index (`int`, *optional*, defaults to 0): + The index of the current process. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the shards should be created by splitting a batch to give a piece of it on each process, or by + yielding different full batches on each process. + + On two processes with a sampler of `[[0, 1, 2, 3], [4, 5, 6, 7]]`, this will result in: + + - the sampler on process 0 to yield `[0, 1, 2, 3]` and the sampler on process 1 to yield `[4, 5, 6, 7]` if + this argument is set to `False`. + - the sampler on process 0 to yield `[0, 1]` then `[4, 5]` and the sampler on process 1 to yield `[2, 3]` + then `[6, 7]` if this argument is set to `True`. + + + + This does not support `BatchSampler` with varying batch size yet. + + """ + + def __init__( + self, + batch_sampler: BatchSampler, + num_processes: int = 1, + process_index: int = 0, + split_batches: bool = False, + ): + if split_batches and batch_sampler.batch_size % num_processes != 0: + raise ValueError( + f"To use `BatchSamplerShard` in `split_batches` mode, the batch size ({batch_sampler.batch_size}) " + f"needs to be a round multiple of the number of processes ({num_processes})." + ) + self.batch_sampler = batch_sampler + self.num_processes = num_processes + self.process_index = process_index + self.split_batches = split_batches + self.batch_size = batch_sampler.batch_size + self.drop_last = batch_sampler.drop_last + + def __len__(self): + if self.split_batches: + return len(self.batch_sampler) + if len(self.batch_sampler) % self.num_processes == 0: + return len(self.batch_sampler) // self.num_processes + length = len(self.batch_sampler) // self.num_processes + return length if self.drop_last else length + 1 + + def __iter__(self): + return self._iter_with_split() if self.split_batches else self._iter_with_no_split() + + def _iter_with_split(self): + initial_data = [] + batch_length = self.batch_sampler.batch_size // self.num_processes + for idx, batch in enumerate(self.batch_sampler): + if idx == 0: + initial_data = batch + if len(batch) == self.batch_size: + # If the batch is full, we yield the part of it this process is responsible of. + yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)] + + # If drop_last is True of the last batch was full, iteration is over, otherwise... + if not self.drop_last and len(initial_data) > 0 and len(batch) < self.batch_size: + # For degenerate cases where the dataset has less than num_process * batch_size samples + while len(initial_data) < self.batch_size: + initial_data += initial_data + batch = batch + initial_data + yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)] + + def _iter_with_no_split(self): + initial_data = [] + batch_to_yield = [] + for idx, batch in enumerate(self.batch_sampler): + # We gather the initial indices in case we need to circle back at the end. + if not self.drop_last and idx < self.num_processes: + initial_data += batch + # We identify the batch to yield but wait until we ar sure every process gets a full batch before actually + # yielding it. + if idx % self.num_processes == self.process_index: + batch_to_yield = batch + if idx % self.num_processes == self.num_processes - 1 and len(batch) == self.batch_size: + yield batch_to_yield + batch_to_yield = [] + + # If drop_last is True, iteration is over, otherwise... + if not self.drop_last and len(initial_data) > 0: + # ... we yield the complete batch we had saved before if it has the proper length + if len(batch_to_yield) == self.batch_size: + yield batch_to_yield + + # For degenerate cases where the dataset has less than num_process * batch_size samples + while len(initial_data) < self.num_processes * self.batch_size: + initial_data += initial_data + + # If the last batch seen was of the proper size, it has been yielded by its process so we move to the next + if len(batch) == self.batch_size: + batch = [] + idx += 1 + + # Make sure we yield a multiple of self.num_processes batches + cycle_index = 0 + while idx % self.num_processes != 0 or len(batch) > 0: + end_index = cycle_index + self.batch_size - len(batch) + batch += initial_data[cycle_index:end_index] + if idx % self.num_processes == self.process_index: + yield batch + cycle_index = end_index + batch = [] + idx += 1 + + +class IterableDatasetShard(IterableDataset): + """ + Wraps a PyTorch `IterableDataset` to generate samples for one of the processes only. Instances of this class will + always yield a number of samples that is a round multiple of the actual batch size (depending of the value of + `split_batches`, this is either `batch_size` or `batch_size x num_processes`). Depending on the value of the + `drop_last` attribute of the batch sampler passed, it will either stop the iteration at the first batch that would + be too small or loop with indices from the beginning. + + Args: + dataset (`torch.utils.data.dataset.IterableDataset`): + The batch sampler to split in several shards. + batch_size (`int`, *optional*, defaults to 1): + The size of the batches per shard (if `split_batches=False`) or the size of the batches (if + `split_batches=True`). + drop_last (`bool`, *optional*, defaults to `False`): + Whether or not to drop the last incomplete batch or complete the last batches by using the samples from the + beginning. + num_processes (`int`, *optional*, defaults to 1): + The number of processes running concurrently. + process_index (`int`, *optional*, defaults to 0): + The index of the current process. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the shards should be created by splitting a batch to give a piece of it on each process, or by + yielding different full batches on each process. + + On two processes with an iterable dataset yielding of `[0, 1, 2, 3, 4, 5, 6, 7]`, this will result in: + + - the shard on process 0 to yield `[0, 1, 2, 3]` and the shard on process 1 to yield `[4, 5, 6, 7]` if this + argument is set to `False`. + - the shard on process 0 to yield `[0, 1, 4, 5]` and the sampler on process 1 to yield `[2, 3, 6, 7]` if + this argument is set to `True`. + """ + + def __init__( + self, + dataset: IterableDataset, + batch_size: int = 1, + drop_last: bool = False, + num_processes: int = 1, + process_index: int = 0, + split_batches: bool = False, + ): + if split_batches and batch_size > 1 and batch_size % num_processes != 0: + raise ValueError( + f"To use `IterableDatasetShard` in `split_batches` mode, the batch size ({batch_size}) " + f"needs to be a round multiple of the number of processes ({num_processes})." + ) + self.dataset = dataset + self.batch_size = batch_size + self.drop_last = drop_last + self.num_processes = num_processes + self.process_index = process_index + self.split_batches = split_batches + + def __iter__(self): + real_batch_size = self.batch_size if self.split_batches else (self.batch_size * self.num_processes) + process_batch_size = (self.batch_size // self.num_processes) if self.split_batches else self.batch_size + process_slice = range(self.process_index * process_batch_size, (self.process_index + 1) * process_batch_size) + + first_batch = None + current_batch = [] + for element in self.dataset: + current_batch.append(element) + # Wait to have a full batch before yielding elements. + if len(current_batch) == real_batch_size: + for i in process_slice: + yield current_batch[i] + if first_batch is None: + first_batch = current_batch.copy() + current_batch = [] + + # Finished if drop_last is True, otherwise complete the last batch with elements from the beginning. + if not self.drop_last and len(current_batch) > 0: + if first_batch is None: + first_batch = current_batch.copy() + while len(current_batch) < real_batch_size: + current_batch += first_batch + for i in process_slice: + yield current_batch[i] + + +class DataLoaderShard(DataLoader): + """ + Subclass of a PyTorch `DataLoader` that will deal with device placement and current distributed setup. + + Args: + dataset (`torch.utils.data.dataset.Dataset`): + The dataset to use to build this datalaoder. + device (`torch.device`, *optional*): + If passed, the device to put all batches on. + rng_types (list of `str` or [`~utils.RNGType`]): + The list of random number generators to synchronize at the beginning of each iteration. Should be one or + several of: + + - `"torch"`: the base torch random number generator + - `"cuda"`: the CUDA random number generator (GPU only) + - `"xla"`: the XLA random number generator (TPU only) + - `"generator"`: an optional `torch.Generator` + generator (`torch.Generator`, *optional*): + A random number generator to keep synchronized across processes. + kwargs: + All other keyword arguments to pass to the regular `DataLoader` initialization. + + **Available attributes:** + + - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes. + Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total + number of processes + """ + + def __init__(self, dataset, device=None, rng_types=None, generator=None, **kwargs): + super().__init__(dataset, **kwargs) + self.device = device + self.rng_types = rng_types + self.generator = generator + self.gradient_state = GradientState() + + def __iter__(self): + if self.rng_types is not None: + synchronize_rng_states(self.rng_types, self.generator) + self.gradient_state._set_end_of_dataloader(False) + dataloader_iter = super().__iter__() + # We iterate one batch ahead to check when we are at the end + try: + current_batch = next(dataloader_iter) + except StopIteration: + yield + while True: + try: + # But we still move it to the device so it is done before `StopIteration` is reached + if self.device is not None: + current_batch = send_to_device(current_batch, self.device) + next_batch = next(dataloader_iter) + yield current_batch + current_batch = next_batch + except StopIteration: + self.gradient_state._set_end_of_dataloader(True) + yield current_batch + break + + @property + def total_batch_size(self): + return ( + self.batch_sampler.batch_size + if self.batch_sampler.split_batches + else (self.batch_sampler.batch_size * self.batch_sampler.num_processes) + ) + + +class DataLoaderDispatcher(DataLoader): + """ + Args: + Subclass of a PyTorch `DataLoader` that will iterate and preprocess on process 0 only, then dispatch on each + process their part of the batch. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the resulting `DataLoader` should split the batches of the original data loader across devices or + yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of + `num_processes` batches at each iteration). Another way to see this is that the observed batch size will be + the same as the initial `dataloader` if this option is set to `True`, the batch size of the initial + `dataloader` multiplied by `num_processes` otherwise. Setting this option to `True` requires that the batch + size of the `dataloader` is a round multiple of `batch_size`. + + **Available attributes:** + + - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes. + Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total + number of processes + """ + + def __init__(self, dataset, split_batches: bool = False, **kwargs): + shuffle = False + if is_torch_version(">=", "1.11.0"): + from torch.utils.data.datapipes.iter.combinatorics import ShufflerIterDataPipe + + # We need to save the shuffling state of the DataPipe + if isinstance(dataset, ShufflerIterDataPipe): + shuffle = dataset._shuffle_enabled + super().__init__(dataset, **kwargs) + self.split_batches = split_batches + if is_torch_version("<", "1.8.0"): + raise ImportError( + "Using `DataLoaderDispatcher` requires PyTorch 1.8.0 minimum. You have {torch.__version__}." + ) + if shuffle: + torch.utils.data.graph_settings.apply_shuffle_settings(dataset, shuffle=shuffle) + + self.gradient_state = GradientState() + self.state = AcceleratorState() + + def _fetch_batches(self, iterator): + batches, batch = None, None + # On process 0, we gather the batch to dispatch. + if self.state.process_index == 0: + try: + if self.split_batches: + # One batch of the main iterator is dispatched and split. + batch = next(iterator) + else: + # num_processes batches of the main iterator are concatenated then dispatched and split. + # We add the batches one by one so we have the remainder available when drop_last=False. + batches = [] + for _ in range(self.state.num_processes): + batches.append(next(iterator)) + batch = concatenate(batches, dim=0) + # In both cases, we need to get the structure of the batch that we will broadcast on other + # processes to initialize the tensors with the right shape. + # data_structure, stop_iteration + batch_info = [get_data_structure(batch), False] + except StopIteration: + batch_info = [None, True] + else: + batch_info = [None, self._stop_iteration] + # This is inplace, so after this instruction, every process has the same `batch_info` as process 0. + broadcast_object_list(batch_info) + self._stop_iteration = batch_info[1] + if self._stop_iteration: + # If drop_last is False and split_batches is False, we may have a remainder to take care of. + if not self.split_batches and not self.drop_last: + if self.state.process_index == 0 and len(batches) > 0: + batch = concatenate(batches, dim=0) + batch_info = [get_data_structure(batch), False] + else: + batch_info = [None, True] + broadcast_object_list(batch_info) + if batch_info[1]: + return batch, batch_info, True + else: + return batch, batch_info, True + return batch, batch_info, False + + def __iter__(self): + self.gradient_state._set_end_of_dataloader(False) + main_iterator = None + if self.state.process_index == 0: + # We only iterate through the DataLoader on process 0. + main_iterator = super().__iter__() + self._stop_iteration = False + first_batch = None + batch, batch_info, skip = self._fetch_batches(main_iterator) + while True: + if skip: + continue + if self.state.process_index != 0: + # Initialize tensors on other processes than process 0. + batch = initialize_tensors(batch_info[0]) + batch = send_to_device(batch, self.state.device) + # Broadcast the batch before splitting it. + batch = broadcast(batch, from_process=0) + + if not self.drop_last and first_batch is None: + # We keep at least num processes elements of the first batch to be able to complete the last batch + first_batch = slice_tensors(batch, slice(0, self.state.num_processes)) + + observed_batch_size = find_batch_size(batch) + batch_size = observed_batch_size // self.state.num_processes + + if not self.drop_last and self._stop_iteration and observed_batch_size % self.state.num_processes != 0: + # If the last batch is not complete, let's add the first batch to it. + batch = concatenate([batch, first_batch], dim=0) + batch_size += 1 + + data_slice = slice(self.state.process_index * batch_size, (self.state.process_index + 1) * batch_size) + next_batch, next_batch_info, next_skip = self._fetch_batches(main_iterator) + if not self._stop_iteration: + yield slice_tensors(batch, data_slice) + batch, batch_info, skip = next_batch, next_batch_info, next_skip + else: + self.gradient_state._set_end_of_dataloader(True) + yield slice_tensors(batch, data_slice) + break + + def __len__(self): + whole_length = super().__len__() + if self.split_batches: + return whole_length + elif self.drop_last: + return whole_length // self.state.num_processes + else: + return math.ceil(whole_length / self.state.num_processes) + + @property + def total_batch_size(self): + return ( + self.dataset.batch_size if self.split_batches else (self.dataset.batch_size * self.dataset.num_processes) + ) + + +def prepare_data_loader( + dataloader: DataLoader, + device: Optional[torch.device] = None, + num_processes: Optional[int] = None, + process_index: Optional[int] = None, + split_batches: bool = False, + put_on_device: bool = False, + rng_types: Optional[List[Union[str, RNGType]]] = None, + dispatch_batches: Optional[bool] = None, +) -> DataLoader: + """ + Wraps a PyTorch `DataLoader` to generate batches for one of the processes only. + + Depending on the value of the `drop_last` attribute of the `dataloader` passed, it will either stop the iteration + at the first batch that would be too small / not present on all processes or loop with indices from the beginning. + + Args: + dataloader (`torch.utils.data.dataloader.DataLoader`): + The data loader to split across several devices. + device (`torch.device`): + The target device for the returned `DataLoader`. + num_processes (`int`, *optional*): + The number of processes running concurrently. Will default to the value given by + [`~state.AcceleratorState`]. + process_index (`int`, *optional*): + The index of the current process. Will default to the value given by [`~state.AcceleratorState`]. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the resulting `DataLoader` should split the batches of the original data loader across devices or + yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of + `num_processes` batches at each iteration). + + Another way to see this is that the observed batch size will be the same as the initial `dataloader` if + this option is set to `True`, the batch size of the initial `dataloader` multiplied by `num_processes` + otherwise. + + Setting this option to `True` requires that the batch size of the `dataloader` is a round multiple of + `batch_size`. + put_on_device (`bool`, *optional*, defaults to `False`): + Whether or not to put the batches on `device` (only works if the batches are nested list, tuples or + dictionaries of tensors). + rng_types (list of `str` or [`~utils.RNGType`]): + The list of random number generators to synchronize at the beginning of each iteration. Should be one or + several of: + + - `"torch"`: the base torch random number generator + - `"cuda"`: the CUDA random number generator (GPU only) + - `"xla"`: the XLA random number generator (TPU only) + - `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your + dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type. + + dispatch_batches (`bool`, *optional*): + If set to `True`, the datalaoder prepared is only iterated through on the main process and then the batches + are split and broadcast to each process. Will default to `True` when the underlying dataset is an + `IterableDataset`, `False` otherwise. + + Returns: + `torch.utils.data.dataloader.DataLoader`: A new data loader that will yield the portion of the batches + + + + This does not support `BatchSampler` with varying batch size yet. + + """ + if dispatch_batches is None: + if is_torch_version("<", "1.8.0") or not put_on_device: + dispatch_batches = False + else: + dispatch_batches = isinstance(dataloader.dataset, IterableDataset) + + if dispatch_batches and not put_on_device: + raise ValueError("Using `dispatch_batches=True` requires `put_on_device=True`.") + # Grab defaults from AcceleratorState + state = AcceleratorState() + if num_processes is None: + num_processes = state.num_processes + if process_index is None: + process_index = state.process_index + + # Sanity check + if split_batches and dataloader.batch_size > 1 and dataloader.batch_size % num_processes != 0: + raise ValueError( + f"To use a `DataLoader` in `split_batches` mode, the batch size ({dataloader.batch_size}) " + f"needs to be a round multiple of the number of processes ({num_processes})." + ) + + new_dataset = dataloader.dataset + # Iterable dataset doesn't like batch_sampler, but data_loader creates a default one for it + new_batch_sampler = dataloader.batch_sampler if not isinstance(new_dataset, IterableDataset) else None + generator = getattr(dataloader, "generator", None) + # No change if no multiprocess + if num_processes != 1 and not dispatch_batches: + if isinstance(new_dataset, IterableDataset): + if getattr(dataloader.dataset, "generator", None) is not None: + generator = dataloader.dataset.generator + new_dataset = IterableDatasetShard( + new_dataset, + batch_size=dataloader.batch_size, + drop_last=dataloader.drop_last, + num_processes=num_processes, + process_index=process_index, + split_batches=split_batches, + ) + else: + # New batch sampler for the current process. + if hasattr(dataloader.sampler, "generator"): + if dataloader.sampler.generator is None: + dataloader.sampler.generator = torch.Generator() + generator = dataloader.sampler.generator + generator.manual_seed(int(torch.empty((), dtype=torch.int64).random_().item())) + elif getattr(dataloader.batch_sampler, "generator", None) is not None: + generator = dataloader.batch_sampler.generator + new_batch_sampler = BatchSamplerShard( + dataloader.batch_sampler, + num_processes=num_processes, + process_index=process_index, + split_batches=split_batches, + ) + + # We ignore all of those since they are all dealt with by our new_batch_sampler + ignore_kwargs = [ + "batch_size", + "shuffle", + "sampler", + "batch_sampler", + "drop_last", + "generator", + ] + + if rng_types is not None and generator is None and "generator" in rng_types: + rng_types.remove("generator") + + kwargs = { + k: getattr(dataloader, k, _PYTORCH_DATALOADER_KWARGS[k]) + for k in _PYTORCH_DATALOADER_KWARGS + if k not in ignore_kwargs + } + + # Need to provide batch_size as batch_sampler is None for Iterable dataset + if new_batch_sampler is None: + kwargs["drop_last"] = dataloader.drop_last + kwargs["batch_size"] = dataloader.batch_size // num_processes if split_batches else dataloader.batch_size + + if dispatch_batches: + dataloader = DataLoaderDispatcher( + new_dataset, + split_batches=split_batches, + batch_sampler=new_batch_sampler, + **kwargs, + ) + else: + dataloader = DataLoaderShard( + new_dataset, + device=device if put_on_device and state.distributed_type != DistributedType.TPU else None, + batch_sampler=new_batch_sampler, + rng_types=rng_types, + generator=generator, + **kwargs, + ) + + if state.distributed_type == DistributedType.TPU: + return MpDeviceLoaderWrapper(dataloader, device) + return dataloader diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/hooks.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..5c04b86213110e2dfefae96765765696a03ebd80 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/hooks.py @@ -0,0 +1,480 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from typing import Dict, List, Mapping, Optional, Union + +import torch +import torch.nn as nn + +from .utils import PrefixedDataset, find_device, named_module_tensors, send_to_device, set_module_tensor_to_device + + +class ModelHook: + """ + A hook that contains callbacks to be executed just before and after the forward method of a model. The difference + with PyTorch existing hooks is that they get passed along the kwargs. + + Class attribute: + - **no_grad** (`bool`, *optional*, defaults to `False`) -- Whether or not to execute the actual forward pass under + the `torch.no_grad()` context manager. + """ + + no_grad = False + + def init_hook(self, module): + """ + To be executed when the hook is attached to the module. + + Args: + module (`torch.nn.Module`): The module attached to this hook. + """ + return module + + def pre_forward(self, module, *args, **kwargs): + """ + To be executed just before the forward method of the model. + + Args: + module (`torch.nn.Module`): The module whose forward pass will be executed just after this event. + args (`Tuple[Any]`): The positional arguments passed to the module. + kwargs (`Dict[Str, Any]`): The keyword arguments passed to the module. + + Returns: + `Tuple[Tuple[Any], Dict[Str, Any]]`: A tuple with the treated `args` and `kwargs`. + """ + return args, kwargs + + def post_forward(self, module, output): + """ + To be executed just after the forward method of the model. + + Args: + module (`torch.nn.Module`): The module whose forward pass been executed just before this event. + output (`Any`): The output of the module. + + Returns: + `Any`: The processed `output`. + """ + return output + + def detach_hook(self, module): + """ + To be executed when the hook is deached from a module. + + Args: + module (`torch.nn.Module`): The module detached from this hook. + """ + return module + + +class SequentialHook(ModelHook): + """ + A hook that can contain several hooks and iterates through them at each event. + """ + + def __init__(self, *hooks): + self.hooks = hooks + + def init_hook(self, module): + for hook in self.hooks: + module = hook.init_hook(module) + return module + + def pre_forward(self, module, *args, **kwargs): + for hook in self.hooks: + args, kwargs = hook.pre_forward(module, *args, **kwargs) + return args, kwargs + + def post_forward(self, module, output): + for hook in self.hooks: + output = hook.post_forward(module, output) + return output + + def detach_hook(self, module): + for hook in self.hooks: + module = hook.detach_hook(module) + return module + + +def add_hook_to_module(module: nn.Module, hook: ModelHook): + """ + Adds a hook to a given module. This will rewrite the `forward` method of the module to include the hook, to remove + this behavior and restore the original `forward` method, use `remove_hook_from_module`. + + + + If the module already contains a hook, this will replace it with the new hook passed. To chain two hooks together, + use the `SequentialHook` class. + + + + Args: + module (`torch.nn.Module`): The module to attach a hook to. + hook (`ModelHook`): The hook to attach. + + Returns: + `torch.nn.Module`: The same module, with the hook attached (the module is modified in place, so the result can + be discarded). + """ + if hasattr(module, "_hf_hook") and hasattr(module, "_old_forward"): + # If we already put some hook on this module, we replace it with the new one. + old_forward = module._old_forward + else: + old_forward = module.forward + module._old_forward = old_forward + + module = hook.init_hook(module) + module._hf_hook = hook + + @functools.wraps(old_forward) + def new_forward(*args, **kwargs): + args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) + if module._hf_hook.no_grad: + with torch.no_grad(): + output = old_forward(*args, **kwargs) + else: + output = old_forward(*args, **kwargs) + return module._hf_hook.post_forward(module, output) + + module.forward = new_forward + return module + + +def remove_hook_from_module(module: nn.Module): + """ + Removes any hook attached to a module via `add_hook_to_module`. + + Args: + module (`torch.nn.Module`): The module to attach a hook to. + + Returns: + `torch.nn.Module`: The same module, with the hook detached (the module is modified in place, so the result can + be discarded). + """ + if hasattr(module, "_hf_hook"): + module._hf_hook.detach_hook(module) + delattr(module, "_hf_hook") + + if hasattr(module, "_old_forward"): + module.forward = module._old_forward + delattr(module, "_old_forward") + + return module + + +class AlignDevicesHook(ModelHook): + """ + A generic `ModelHook` that ensures inputs and model weights are on the same device for the forward pass of the + associated module, potentially offloading the weights after the forward pass. + + Args: + execution_device (`torch.device`, *optional*): + The device on which inputs and model weights should be placed before the forward pass. + offload (`bool`, *optional*, defauts to `False`): + Whether or not the weights should be offloaded after the forward pass. + io_same_device (`bool`, *optional*, defaults to `False`): + Whether or not the output should be placed on the same device as the input was. + weights_map (`Mapping[str, torch.Tensor]`, *optional*): + When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to include the associated module's buffers when offloading. + place_submodules (`bool`, *optional*, defaults to `False`): + Whether to place the submodules on `execution_device` during the `init_hook` event. + """ + + def __init__( + self, + execution_device: Optional[Union[int, str, torch.device]] = None, + offload: bool = False, + io_same_device: bool = False, + weights_map: Optional[Mapping] = None, + offload_buffers: bool = False, + place_submodules: bool = False, + ): + self.execution_device = execution_device + self.offload = offload + self.io_same_device = io_same_device + self.weights_map = weights_map + self.offload_buffers = offload_buffers + self.place_submodules = place_submodules + + # Will contain the input device when `io_same_device=True`. + self.input_device = None + self.param_original_devices = {} + self.buffer_original_devices = {} + + def init_hook(self, module): + if not self.offload and self.execution_device is not None: + for name, _ in named_module_tensors(module, recurse=self.place_submodules): + set_module_tensor_to_device(module, name, self.execution_device) + elif self.offload: + self.original_devices = { + name: param.device for name, param in named_module_tensors(module, recurse=self.place_submodules) + } + if self.weights_map is None: + self.weights_map = { + name: param.to("cpu") + for name, param in named_module_tensors( + module, include_buffers=self.offload_buffers, recurse=self.place_submodules + ) + } + + for name, _ in named_module_tensors( + module, include_buffers=self.offload_buffers, recurse=self.place_submodules + ): + set_module_tensor_to_device(module, name, "meta") + if not self.offload_buffers and self.execution_device is not None: + for name, _ in module.named_buffers(recurse=self.place_submodules): + set_module_tensor_to_device(module, name, self.execution_device) + return module + + def pre_forward(self, module, *args, **kwargs): + if self.io_same_device: + self.input_device = find_device([args, kwargs]) + if self.offload: + for name, _ in named_module_tensors( + module, include_buffers=self.offload_buffers, recurse=self.place_submodules + ): + set_module_tensor_to_device(module, name, self.execution_device, value=self.weights_map[name]) + + return send_to_device(args, self.execution_device), send_to_device(kwargs, self.execution_device) + + def post_forward(self, module, output): + if self.offload: + for name, _ in named_module_tensors( + module, include_buffers=self.offload_buffers, recurse=self.place_submodules + ): + set_module_tensor_to_device(module, name, "meta") + + if self.io_same_device and self.input_device is not None: + output = send_to_device(output, self.input_device) + + return output + + def detach_hook(self, module): + if self.offload: + for name, device in self.original_devices.items(): + if device != torch.device("meta"): + set_module_tensor_to_device(module, name, device, value=self.weights_map.get(name, None)) + + +def attach_execution_device_hook( + module: torch.nn.Module, + execution_device: Union[int, str, torch.device], + preload_module_classes: Optional[List[str]] = None, +): + """ + Recursively attaches `AlignDevicesHook` to all submodules of a given model to make sure they have the right + execution device + + Args: + module (`torch.nn.Module`): + The module where we want to attach the hooks. + execution_device (`int`, `str` or `torch.device`): + The device on which inputs and model weights should be placed before the forward pass. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + if not hasattr(module, "_hf_hook") and len(module.state_dict()) > 0: + add_hook_to_module(module, AlignDevicesHook(execution_device)) + + # Break the recursion if we get to a preload module. + if preload_module_classes is not None and module.__class__.__name__ in preload_module_classes: + return + + for child in module.children(): + attach_execution_device_hook(child, execution_device) + + +def attach_align_device_hook( + module: torch.nn.Module, + execution_device: Optional[torch.device] = None, + offload: bool = False, + weights_map: Optional[Mapping] = None, + offload_buffers: bool = False, + module_name: str = "", + preload_module_classes: Optional[List[str]] = None, +): + """ + Recursively attaches `AlignDevicesHook` to all submodules of a given model that have direct parameters and/or + buffers. + + Args: + module (`torch.nn.Module`): + The module where we want to attach the hooks. + execution_device (`torch.device`, *optional*): + The device on which inputs and model weights should be placed before the forward pass. + offload (`bool`, *optional*, defauts to `False`): + Whether or not the weights should be offloaded after the forward pass. + weights_map (`Mapping[str, torch.Tensor]`, *optional*): + When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to include the associated module's buffers when offloading. + module_name (`str`, *optional*, defaults to `""`): + The name of the module. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + # Attach the hook on this module if it has any direct tensor. + directs = named_module_tensors(module) + full_offload = ( + offload and preload_module_classes is not None and module.__class__.__name__ in preload_module_classes + ) + + if len(list(directs)) > 0 or full_offload: + if weights_map is not None: + prefix = f"{module_name}." if len(module_name) > 0 else "" + prefixed_weights_map = PrefixedDataset(weights_map, prefix) + else: + prefixed_weights_map = None + hook = AlignDevicesHook( + execution_device=execution_device, + offload=offload, + weights_map=prefixed_weights_map, + offload_buffers=offload_buffers, + place_submodules=full_offload, + ) + add_hook_to_module(module, hook) + + # We stop the recursion in case we hit the full offload. + if full_offload: + return + + # Recurse on all children of the module. + for child_name, child in module.named_children(): + child_name = f"{module_name}.{child_name}" if len(module_name) > 0 else child_name + attach_align_device_hook( + child, + execution_device=execution_device, + offload=offload, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=child_name, + preload_module_classes=preload_module_classes, + ) + + +def remove_hook_from_submodules(module: nn.Module): + """ + Recursively removes all hooks attached on the submodules of a given model. + + Args: + module (`torch.nn.Module`): The module on which to remove all hooks. + """ + remove_hook_from_module(module) + for child in module.children(): + remove_hook_from_submodules(child) + + +def attach_align_device_hook_on_blocks( + module: nn.Module, + execution_device: Optional[Union[torch.device, Dict[str, torch.device]]] = None, + offload: Union[bool, Dict[str, bool]] = False, + weights_map: Mapping = None, + offload_buffers: bool = False, + module_name: str = "", + preload_module_classes: Optional[List[str]] = None, +): + """ + Attaches `AlignDevicesHook` to all blocks of a given model as needed. + + Args: + module (`torch.nn.Module`): + The module where we want to attach the hooks. + execution_device (`torch.device` or `Dict[str, torch.device]`, *optional*): + The device on which inputs and model weights should be placed before the forward pass. It can be one device + for the whole module, or a dictionary mapping module name to device. + offload (`bool`, *optional*, defauts to `False`): + Whether or not the weights should be offloaded after the forward pass. It can be one boolean for the whole + module, or a dictionary mapping module name to boolean. + weights_map (`Mapping[str, torch.Tensor]`, *optional*): + When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to include the associated module's buffers when offloading. + module_name (`str`, *optional*, defaults to `""`): + The name of the module. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + # If one device and one offload, we've got one hook. + if not isinstance(execution_device, Mapping) and not isinstance(offload, dict): + if not offload: + hook = AlignDevicesHook(execution_device=execution_device, io_same_device=True, place_submodules=True) + add_hook_to_module(module, hook) + else: + attach_align_device_hook( + module, + execution_device=execution_device, + offload=True, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=module_name, + ) + return + + if not isinstance(execution_device, Mapping): + execution_device = {key: execution_device for key in offload.keys()} + if not isinstance(offload, Mapping): + offload = {key: offload for key in execution_device.keys()} + + if module_name in execution_device and not offload[module_name]: + hook = AlignDevicesHook( + execution_device=execution_device[module_name], + offload_buffers=offload_buffers, + io_same_device=(module_name == ""), + place_submodules=True, + ) + add_hook_to_module(module, hook) + attach_execution_device_hook(module, execution_device[module_name]) + elif module_name in execution_device: + attach_align_device_hook( + module, + execution_device=execution_device[module_name], + offload=True, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=module_name, + preload_module_classes=preload_module_classes, + ) + if not hasattr(module, "_hf_hook"): + hook = AlignDevicesHook(execution_device=execution_device[module_name], io_same_device=(module_name == "")) + add_hook_to_module(module, hook) + attach_execution_device_hook( + module, execution_device[module_name], preload_module_classes=preload_module_classes + ) + elif module_name == "": + hook = AlignDevicesHook(io_same_device=True) + add_hook_to_module(module, hook) + + for child_name, child in module.named_children(): + child_name = f"{module_name}.{child_name}" if len(module_name) > 0 else child_name + attach_align_device_hook_on_blocks( + child, + execution_device=execution_device, + offload=offload, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=child_name, + preload_module_classes=preload_module_classes, + ) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/launchers.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/launchers.py new file mode 100644 index 0000000000000000000000000000000000000000..8ddc9af346c269c7c30b83598333153868fe599b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/launchers.py @@ -0,0 +1,177 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import tempfile +import warnings + +import torch + +from .state import AcceleratorState +from .utils import PrecisionType, PrepareForLaunch, is_torch_version, patch_environment + + +def notebook_launcher(function, args=(), num_processes=None, use_fp16=False, mixed_precision="no", use_port="29500"): + """ + Launches a training function, using several processes if it's possible in the current environment (TPU with + multiple cores for instance). + + Args: + function (`Callable`): + The training function to execute. If it accepts arguments, the first argument should be the index of the + process run. + args (`Tuple`): + Tuple of arguments to pass to the function (it will receive `*args`). + num_processes (`int`, *optional*): + The number of processes to use for training. Will default to 8 in Colab/Kaggle if a TPU is available, to + the number of GPUs available otherwise. + mixed_precision (`str`, *optional*, defaults to `"no"`): + If `fp16` or `bf16`, will use mixed precision training on multi-GPU. + use_port (`str`, *optional*, defaults to `"29500"`): + The port to use to communicate between processes when launching a multi-GPU training. + """ + # Are we in a google colab or a Kaggle Kernel? + if any(key.startswith("KAGGLE") for key in os.environ.keys()): + in_colab_or_kaggle = True + elif "IPython" in sys.modules: + in_colab_or_kaggle = "google.colab" in str(sys.modules["IPython"].get_ipython()) + else: + in_colab_or_kaggle = False + + try: + mixed_precision = PrecisionType(mixed_precision.lower()) + except ValueError: + raise ValueError( + f"Unknown mixed_precision mode: {args.mixed_precision.lower()}. Choose between {PrecisionType.list()}." + ) + + if in_colab_or_kaggle: + if os.environ.get("TPU_NAME", None) is not None: + # TPU launch + import torch_xla.distributed.xla_multiprocessing as xmp + + if len(AcceleratorState._shared_state) > 0: + raise ValueError( + "To train on TPU in Colab or Kaggle Kernel, the `Accelerator` should only be initialized inside " + "your training function. Restart your notebook and make sure no cells initializes an " + "`Accelerator`." + ) + if num_processes is None: + num_processes = 8 + + launcher = PrepareForLaunch(function, distributed_type="TPU") + print(f"Launching a training on {num_processes} TPU cores.") + xmp.spawn(launcher, args=args, nprocs=num_processes, start_method="fork") + else: + # No need for a distributed launch otherwise as it's either CPU or one GPU. + if torch.cuda.is_available(): + print("Launching training on one GPU.") + else: + print("Launching training on one CPU.") + function(*args) + + else: + if num_processes is None: + raise ValueError( + "You have to specify the number of GPUs you would like to use, add `num_processes=...` to your call." + ) + + if num_processes > 1: + # Multi-GPU launch + if is_torch_version("<", "1.5.0"): + raise ImportError( + "Using `notebook_launcher` for distributed training on GPUs require torch >= 1.5.0, got " + f"{torch.__version__}." + ) + + from torch.multiprocessing import start_processes + + if len(AcceleratorState._shared_state) > 0: + raise ValueError( + "To launch a multi-GPU training from your notebook, the `Accelerator` should only be initialized " + "inside your training function. Restart your notebook and make sure no cells initializes an " + "`Accelerator`." + ) + + if torch.cuda.is_initialized(): + raise ValueError( + "To launch a multi-GPU training from your notebook, you need to avoid running any instruction " + "using `torch.cuda` in any cell. Restart your notebook and make sure no cells use any CUDA " + "function." + ) + + if use_fp16: + warnings.warn('use_fp16=True is deprecated. Use mixed_precision="fp16" instead.', DeprecationWarning) + mixed_precision = "fp16" + + # torch.distributed will expect a few environment variable to be here. We set the ones common to each + # process here (the other ones will be set be the launcher). + with patch_environment( + world_size=num_processes, master_addr="127.0.01", master_port=use_port, mixed_precision=mixed_precision + ): + launcher = PrepareForLaunch(function, distributed_type="MULTI_GPU") + + print(f"Launching training on {num_processes} GPUs.") + start_processes(launcher, args=args, nprocs=num_processes, start_method="fork") + + else: + # No need for a distributed launch otherwise as it's either CPU or one GPU. + if torch.cuda.is_available(): + print("Launching training on one GPU.") + else: + print("Launching training on CPU.") + function(*args) + + +def debug_launcher(function, args=(), num_processes=2): + """ + Launches a training function using several processes on CPU for debugging purposes. + + + + This function is provided for internal testing and debugging, but it's not intended for real trainings. It will + only use the CPU. + + + + Args: + function (`Callable`): + The training function to execute. + args (`Tuple`): + Tuple of arguments to pass to the function (it will receive `*args`). + num_processes (`int`, *optional*, defaults to 2): + The number of processes to use for training. + """ + if is_torch_version("<", "1.5.0"): + raise ImportError( + "Using `debug_launcher` for distributed training on GPUs require torch >= 1.5.0, got " + f"{torch.__version__}." + ) + + from torch.multiprocessing import start_processes + + with tempfile.NamedTemporaryFile() as tmp_file: + # torch.distributed will expect a few environment variable to be here. We set the ones common to each + # process here (the other ones will be set be the launcher). + with patch_environment( + world_size=num_processes, + master_addr="127.0.01", + master_port="29500", + mixed_precision="no", + accelerate_debug_rdv_file=tmp_file.name, + use_cpu="yes", + ): + launcher = PrepareForLaunch(function, debug=True) + start_processes(launcher, args=args, nprocs=num_processes, start_method="fork") diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/logging.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..2128cdca7bf9c4069a25cfb61db238b51d69f284 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/logging.py @@ -0,0 +1,63 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from .state import AcceleratorState + + +class MultiProcessAdapter(logging.LoggerAdapter): + """ + An adapter to assist with logging in multiprocess. + + `log` takes in an additional `main_process_only` kwarg, which dictates whether it should be called on all processes + or only the main executed one. Default is `main_process_only=True`. + """ + + @staticmethod + def _should_log(main_process_only): + "Check if log should be performed" + return not main_process_only or (main_process_only and AcceleratorState().local_process_index == 0) + + def log(self, level, msg, *args, **kwargs): + """ + Delegates logger call after checking if we should log. + + Accepts a new kwarg of `main_process_only`, which will dictate whether it will be logged across all processes + or only the main executed one. Default is `True` if not passed + """ + main_process_only = kwargs.pop("main_process_only", True) + if self.isEnabledFor(level) and self._should_log(main_process_only): + msg, kwargs = self.process(msg, kwargs) + self.logger.log(level, msg, *args, **kwargs) + + +def get_logger(name: str): + """ + Returns a `logging.Logger` for `name` that can handle multiprocessing. + + If a log should be called on all processes, pass `main_process_only=False` + + E.g. + ```python + logger.info("My log", main_process_only=False) + logger.debug("My log", main_process_only=False) + ``` + + Args: + name (`str`): + The name for the logger, such as `__file__` + """ + logger = logging.getLogger(name) + return MultiProcessAdapter(logger, {}) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/memory_utils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/memory_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..eba10bb783bf0158b2126cbfbe28cf8c40db34ed --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/memory_utils.py @@ -0,0 +1,29 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all + + +import warnings + + +warnings.warn( + "memory_utils has been reorganized to utils.memory. Import `find_executable_batchsize` from the main `__init__`: " + "`from accelerate import find_executable_batch_size` to avoid this warning.", + FutureWarning, +) + +from .utils.memory import find_executable_batch_size diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/optimizer.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..4fad12c724e19ebb48e8bbb04362a967f5edc5cd --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/optimizer.py @@ -0,0 +1,159 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import warnings + +import torch + +from .state import AcceleratorState, GradientState +from .utils import DistributedType, honor_type, is_torch_version, is_tpu_available + + +if is_tpu_available(check_device=False): + import torch_xla.core.xla_model as xm + + +def move_to_device(state, device): + if isinstance(state, (list, tuple)): + return honor_type(state, (move_to_device(t, device) for t in state)) + elif isinstance(state, dict): + return type(state)({k: move_to_device(v, device) for k, v in state.items()}) + elif isinstance(state, torch.Tensor): + return state.to(device) + return state + + +class AcceleratedOptimizer(torch.optim.Optimizer): + """ + Internal wrapper around a torch optimizer. + + Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient + accumulation. + + Args: + optimizer (`torch.optim.optimizer.Optimizer`): + The optimizer to wrap. + device_placement (`bool`, *optional*, defaults to `True`): + Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of + `optimizer` on the right device. + scaler (`torch.cuda.amp.grad_scaler.GradScaler`, *optional*): + The scaler to use in the step function if training with mixed precision. + """ + + def __init__(self, optimizer, device_placement=True, scaler=None): + self.optimizer = optimizer + self.scaler = scaler + self.accelerator_state = AcceleratorState() + self.gradient_state = GradientState() + self.device_placement = device_placement + self._is_overflow = False + + # Handle device placement + if device_placement: + state_dict = self.optimizer.state_dict() + if self.accelerator_state.distributed_type == DistributedType.TPU: + xm.send_cpu_data_to_device(state_dict, self.accelerator_state.device) + else: + state_dict = move_to_device(state_dict, self.accelerator_state.device) + self.optimizer.load_state_dict(state_dict) + + @property + def state(self): + return self.optimizer.state + + @state.setter + def state(self, state): + self.optimizer.state = state + + @property + def param_groups(self): + return self.optimizer.param_groups + + @param_groups.setter + def param_groups(self, param_groups): + self.optimizer.param_groups = param_groups + + @property + def defaults(self): + return self.optimizer.defaults + + @defaults.setter + def defaults(self, defaults): + self.optimizer.defaults = defaults + + def add_param_group(self, param_group): + self.optimizer.add_param_group(param_group) + + def load_state_dict(self, state_dict): + if self.accelerator_state.distributed_type == DistributedType.TPU and self.device_placement: + xm.send_cpu_data_to_device(state_dict, self.accelerator_state.device) + self.optimizer.load_state_dict(state_dict) + + def state_dict(self): + return self.optimizer.state_dict() + + def zero_grad(self, set_to_none=None): + if self.gradient_state.sync_gradients: + if is_torch_version("<", "1.7.0"): + if set_to_none is not None: + raise ValueError( + "`set_to_none` for Optimizer.zero_grad` was introduced in PyTorch 1.7.0 and can't be used for " + f"earlier versions (found version {torch.__version__})." + ) + self.optimizer.zero_grad() + else: + accept_arg = "set_to_none" in inspect.signature(self.optimizer.zero_grad).parameters + if accept_arg: + if set_to_none is None: + set_to_none = False + self.optimizer.zero_grad(set_to_none=set_to_none) + else: + if set_to_none is not None: + raise ValueError("`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.") + self.optimizer.zero_grad() + + def step(self, closure=None): + if self.gradient_state.sync_gradients: + if self.accelerator_state.distributed_type == DistributedType.TPU: + optimizer_args = {"closure": closure} if closure is not None else {} + xm.optimizer_step(self.optimizer, optimizer_args=optimizer_args) + elif self.scaler is not None: + scale_before = self.scaler.get_scale() + self.scaler.step(self.optimizer, closure) + self.scaler.update() + scale_after = self.scaler.get_scale() + # If we reduced the loss scale, it means the optimizer step was skipped because of gradient overflow. + self._is_overflow = scale_after < scale_before + else: + self.optimizer.step(closure) + + def _switch_parameters(self, parameters_map): + for param_group in self.optimizer.param_groups: + param_group["params"] = [parameters_map.get(p, p) for p in param_group["params"]] + + @property + def is_overflow(self): + """Whether or not the optimizer step was done, or skipped because of gradient overflow.""" + warnings.warn( + "The `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use " + "`optimizer.step_was_skipped` instead.", + FutureWarning, + ) + return self._is_overflow + + @property + def step_was_skipped(self): + """Whether or not the optimizer step was skipped.""" + return self._is_overflow diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/scheduler.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..75695091e98db3a044d5a20e52149795af82d183 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/scheduler.py @@ -0,0 +1,89 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We ignore warnings about stepping the scheduler since we step it ourselves during gradient accumulation + +import warnings + +from .state import AcceleratorState + + +warnings.filterwarnings("ignore", category=UserWarning, module="torch.optim.lr_scheduler") + + +class AcceleratedScheduler: + """ + A wrapper around a learning rate scheduler that will only step when the optimizer(s) have a training step. Useful + to avoid making a scheduler step too fast when gradients went overflow and there was no training step (in mixed + precision training) + + When performing gradient accumulation scheduler lengths should not be changed accordingly, accelerate will always + step the scheduler to account for it. + + Args: + scheduler (`torch.optim.lr_scheduler._LRScheduler`): + The scheduler to wrap. + optimizers (one or a list of `torch.optim.Optimizer`): + The optimizers used. + step_with_optimizer (`bool`, *optional*, defaults to `True`): + Whether or not the scheduler should be stepped at each optimizer step. + split_batches (`bool`, *optional*, defaults to `False`): + Whether or not the dataloaders split one batch across the different processes (so batch size is the same + regardless of the number of processes) or create batches on each process (so batch size is the original + batch size multiplied by the number of processes). + """ + + def __init__(self, scheduler, optimizers, step_with_optimizer: bool = True, split_batches: bool = False): + self.scheduler = scheduler + self.optimizers = optimizers if isinstance(optimizers, (list, tuple)) else [optimizers] + self.split_batches = split_batches + self.step_with_optimizer = step_with_optimizer + + def step(self, *args, **kwargs): + if not self.step_with_optimizer: + # No link between scheduler and optimizer -> just step + self.scheduler.step(*args, **kwargs) + return + + # Otherwise, first make sure the optimizer was stepped. + for opt in self.optimizers: + if opt.step_was_skipped: + return + if self.split_batches: + # Split batches -> the training dataloader batch size is not changed so one step per training step + self.scheduler.step(*args, **kwargs) + else: + # Otherwise the training dataloader batch size was multiplied by `num_processes`, so we need to do + # num_processes steps per training step + num_processes = AcceleratorState().num_processes + for _ in range(num_processes): + # Special case when using OneCycle and `drop_last` was not used + if getattr(self.scheduler, "total_steps", 0) <= self.scheduler.last_epoch: + self.scheduler.step(*args, **kwargs) + + # Passthroughs + def get_last_lr(self): + return self.scheduler.get_last_lr() + + def state_dict(self): + return self.scheduler.state_dict() + + def load_state_dict(self, state_dict): + self.scheduler.load_state_dict(state_dict) + + def get_lr(self): + return self.scheduler.get_lr() + + def print_lr(self, *args, **kwargs): + return self.scheduler.print_lr(*args, **kwargs) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/state.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/state.py new file mode 100644 index 0000000000000000000000000000000000000000..38cd40bc7c1c79942fe5905089c9414d9c97e641 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/state.py @@ -0,0 +1,262 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from distutils.util import strtobool + +import torch + +from .utils import DistributedType, is_ccl_available, is_deepspeed_available, is_tpu_available +from .utils.dataclasses import SageMakerDistributedType + + +if is_tpu_available(check_device=False): + import torch_xla.core.xla_model as xm + + +def get_int_from_env(env_keys, default): + """Returns the first positive env value found in the `env_keys` list or the default.""" + for e in env_keys: + val = int(os.environ.get(e, -1)) + if val >= 0: + return val + return default + + +def parse_flag_from_env(key, default=False): + value = os.environ.get(key, str(default)) + return strtobool(value) == 1 # As its name indicates `strtobool` actually returns an int... + + +def parse_choice_from_env(key, default="no"): + value = os.environ.get(key, str(default)) + return value + + +# Inspired by Alex Martelli's 'Borg'. +class AcceleratorState: + """ + This is a variation of a [singleton class](https://en.wikipedia.org/wiki/Singleton_pattern) in the sense that all + instance of `AcceleratorState` share the same state, which is initialized on the first instantiation. + + Attributes: + + - **device** (`torch.device`) -- The device to use. + - **sync_gradients** (`bool`) -- Whether to sync the gradients or not + - **distributed_type** (`~accelerate.state.DistributedType`) -- The type of distributed environment currently + in use. + - **num_processes** (`int`) -- The number of processes currently launched in parallel. + - **process_index** (`int`) -- The index of the current process. + - **local_process_index** (`int`) -- The index of the current process on the current server. + - **mixed_precision** (`str`) -- Whether or not the current script will use mixed precision. If you are using + mixed precision, define if you want to use FP16 or BF16 (bfloat16) as the floating point. + """ + + _shared_state = {} + + def __init__( + self, + mixed_precision: str = None, + cpu: bool = False, + deepspeed_plugin=None, + fsdp_plugin=None, + _from_accelerator: bool = False, + **kwargs, + ): + self.__dict__ = self._shared_state + if parse_flag_from_env("USE_CPU"): + cpu = True + self._check_initialized(mixed_precision, cpu) + self.fork_launched = parse_flag_from_env("FORK_LAUNCHED", 0) + if not getattr(self, "initialized", False): + self.backend = None + self.deepspeed_plugin = None + mixed_precision = ( + parse_choice_from_env("MIXED_PRECISION", "no") if mixed_precision is None else mixed_precision.lower() + ) + if not _from_accelerator: + raise ValueError( + "Please make sure to properly initialize your accelerator via `accelerator = Accelerator()` " + "before using any functionality from the `accelerate` library." + ) + if ( + os.environ.get("USE_SAGEMAKER", "false") == "true" + and os.environ.get("SAGEMAKER_DISTRIBUTED_TYPE") != SageMakerDistributedType.NO + and not cpu + ): + if os.environ.get("SAGEMAKER_DISTRIBUTED_TYPE") == SageMakerDistributedType.DATA_PARALLEL: + self.distributed_type = DistributedType.MULTI_GPU + import smdistributed.dataparallel.torch.torch_smddp # noqa + + if not torch.distributed.is_initialized(): + torch.distributed.init_process_group(backend="smddp") + self.backend = "smddp" + self.num_processes = torch.distributed.get_world_size() + self.process_index = torch.distributed.get_rank() + self.local_process_index = int(os.environ.get("LOCAL_RANK", -1)) + self.device = torch.device("cuda", self.local_process_index) + torch.cuda.set_device(self.device) + self.mixed_precision = mixed_precision + elif is_tpu_available() and not cpu: + self.distributed_type = DistributedType.TPU + self.num_processes = xm.xrt_world_size() + self.process_index = xm.get_ordinal() + self.local_process_index = xm.get_local_ordinal() + self.device = xm.xla_device() + if mixed_precision == "bf16": + os.environ["XLA_USE_BF16"] = str(1) + self.mixed_precision = mixed_precision + elif os.environ.get("USE_DEEPSPEED", "false") == "true" and not cpu: + assert ( + is_deepspeed_available() + ), "DeepSpeed is not available => install it using `pip3 install deepspeed` or build it from source" + self.distributed_type = DistributedType.DEEPSPEED + if not torch.distributed.is_initialized(): + torch.distributed.init_process_group(backend="nccl", **kwargs) + self.backend = "nccl" + self.num_processes = torch.distributed.get_world_size() + self.process_index = torch.distributed.get_rank() + self.local_process_index = int(os.environ.get("LOCAL_RANK", -1)) + self.device = torch.device("cuda", self.local_process_index) + torch.cuda.set_device(self.device) + self.mixed_precision = "no" # deepspeed handles mixed_precision using deepspeed_config + self.deepspeed_plugin = deepspeed_plugin + elif int(os.environ.get("LOCAL_RANK", -1)) != -1 and not cpu: + self.distributed_type = DistributedType.MULTI_GPU + if not torch.distributed.is_initialized(): + torch.distributed.init_process_group(backend="nccl", **kwargs) + self.backend = "nccl" + self.num_processes = torch.distributed.get_world_size() + self.process_index = torch.distributed.get_rank() + self.local_process_index = int(os.environ.get("LOCAL_RANK", -1)) + self.device = torch.device("cuda", self.local_process_index) + torch.cuda.set_device(self.device) + self.mixed_precision = mixed_precision + if os.environ.get("USE_FSDP", "false") == "true": + self.distributed_type = DistributedType.FSDP + if self.mixed_precision != "no": + fsdp_plugin.set_mixed_precision(self.mixed_precision) + self.fsdp_plugin = fsdp_plugin + elif get_int_from_env(["PMI_SIZE", "OMPI_COMM_WORLD_SIZE", "MV2_COMM_WORLD_SIZE", "WORLD_SIZE"], 1) > 1: + self.distributed_type = DistributedType.MULTI_CPU + if is_ccl_available() and get_int_from_env(["CCL_WORKER_COUNT"], 0) > 0: + backend = "ccl" + elif torch.distributed.is_mpi_available(): + backend = "mpi" + else: + backend = "gloo" + # Try to get launch configuration from environment variables set by MPI launcher - works for Intel MPI, OpenMPI and MVAPICH + rank = get_int_from_env(["RANK", "PMI_RANK", "OMPI_COMM_WORLD_RANK", "MV2_COMM_WORLD_RANK"], 0) + size = get_int_from_env(["WORLD_SIZE", "PMI_SIZE", "OMPI_COMM_WORLD_SIZE", "MV2_COMM_WORLD_SIZE"], 1) + local_rank = get_int_from_env( + ["LOCAL_RANK", "MPI_LOCALRANKID", "OMPI_COMM_WORLD_LOCAL_RANK", "MV2_COMM_WORLD_LOCAL_RANK"], 0 + ) + local_size = get_int_from_env( + ["MPI_LOCALNRANKS", "OMPI_COMM_WORLD_LOCAL_SIZE", "MV2_COMM_WORLD_LOCAL_SIZE"], 1 + ) + self.local_process_index = local_rank + os.environ["RANK"] = str(rank) + os.environ["WORLD_SIZE"] = str(size) + os.environ["LOCAL_RANK"] = str(local_rank) + if not os.environ.get("MASTER_PORT", None): + os.environ["MASTER_PORT"] = "29500" + if not os.environ.get("MASTER_ADDR", None): + if local_size != size and backend != "mpi": + raise ValueError( + "Looks like distributed multinode run but MASTER_ADDR env not set, " + "please try exporting rank 0's hostname as MASTER_ADDR" + ) + if not torch.distributed.is_initialized(): + torch.distributed.init_process_group(backend, rank=rank, world_size=size, **kwargs) + self.backend = backend + self.num_processes = torch.distributed.get_world_size() + self.process_index = torch.distributed.get_rank() + self.local_process_index = local_rank + self.device = torch.device("cpu") + self.mixed_precision = mixed_precision + else: + self.distributed_type = DistributedType.NO + self.num_processes = 1 + self.process_index = self.local_process_index = 0 + self.device = torch.device("cuda" if torch.cuda.is_available() and not cpu else "cpu") + self.mixed_precision = mixed_precision + self.initialized = True + + def __repr__(self): + mixed_precision = self.mixed_precision + + repr = ( + f"Distributed environment: {self.distributed_type}{(' Backend: ' + self.backend) if self.backend else ''}\n" + f"Num processes: {self.num_processes}\n" + f"Process index: {self.process_index}\n" + f"Local process index: {self.local_process_index}\n" + f"Device: {self.device}\n" + ) + if self.distributed_type == DistributedType.DEEPSPEED: + repr += f"ds_config: {self.deepspeed_plugin.deepspeed_config}\n" + else: + f"Mixed precision type: {mixed_precision}\n" + return repr + + # For backward compatibility + @property + def use_fp16(self): + return self.mixed_precision != "no" + + @staticmethod + def _reset_state(): + "Resets `_shared_state`, is used internally and should not be called" + AcceleratorState._shared_state = {} + + def _check_initialized(self, mixed_precision=None, cpu=None): + "Checks if a modification is trying to be made and the `AcceleratorState` has already been initialized" + if getattr(self, "initialized", False): + err = "AcceleratorState has already been initialized and cannot be changed, restart your runtime completely and pass `{flag}` to `Accelerate()`." + if cpu and self.device.type != "cpu": + raise ValueError(err.format(flag="cpu=True")) + if mixed_precision is not None and mixed_precision != self.mixed_precision: + raise ValueError(err.format(flag=f"mixed_precision='{mixed_precision}'")) + + +class GradientState: + """ + This is a variation of a [singleton class](https://en.wikipedia.org/wiki/Singleton_pattern) in the sense that all + instance of `GradientState` share the same state, which is initialized on the first instantiation. + + This specific state revolves around whether gradients should be synced and if we have reached the end of a prepared + dataloader Attributes: + + - **sync_gradients** (`bool`) -- Whether the gradients should be synced + - **end_of_dataloader** (`bool`) -- Whether we have reached the end the current dataloader + """ + + _shared_state = {} + + def __init__(self): + self.__dict__ = self._shared_state + if not getattr(self, "initialized", False): + self.sync_gradients = True + self.end_of_dataloader = False + self.initialized = True + + def __repr__(self): + return f"Sync Gradients: {self.sync_gradients}\n" f"At end of current dataloader: {self.end_of_dataloader}\n" + + def _set_sync_gradients(self, sync_gradients): + "Private function that sets whether gradients should be synchronized. Users should not have to call this." + self.sync_gradients = sync_gradients + + def _set_end_of_dataloader(self, end_of_dataloader): + "Private function that sets whether the end of the current dataloader has been reached. Users should not have to call this." + self.end_of_dataloader = end_of_dataloader diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/tracking.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/tracking.py new file mode 100644 index 0000000000000000000000000000000000000000..3efff77c9f6904376591fa274afc54505b34e877 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/accelerate/tracking.py @@ -0,0 +1,332 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Expectation: +# Provide a project dir name, then each type of logger gets stored in project/{`logging_dir`} + +import os +from abc import ABCMeta, abstractmethod, abstractproperty +from typing import List, Optional, Union + +from .logging import get_logger +from .utils import LoggerType, is_comet_ml_available, is_tensorboard_available, is_wandb_available + + +_available_trackers = [] + +if is_tensorboard_available(): + from torch.utils import tensorboard + + _available_trackers.append(LoggerType.TENSORBOARD) + +if is_wandb_available(): + import wandb + + _available_trackers.append(LoggerType.WANDB) + +if is_comet_ml_available(): + from comet_ml import Experiment + + _available_trackers.append(LoggerType.COMETML) + + +logger = get_logger(__name__) + + +def get_available_trackers(): + "Returns a list of all supported available trackers in the system" + return _available_trackers + + +class GeneralTracker(object, metaclass=ABCMeta): + """ + A base Tracker class to be used for all logging integration implementations. + """ + + @abstractproperty + def requires_logging_directory(self): + """ + Whether the logger requires a directory to store their logs. Should either return `True` or `False`. + """ + pass + + @abstractmethod + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Implementations should use the experiment configuration + functionality of a tracking API. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + pass + + @abstractmethod + def log(self, values: dict, step: Optional[int]): + """ + Logs `values` to the current run. Base `log` implementations of a tracking API should go in here, along with + special behavior for the `step parameter. + + Args: + values (Dictionary `str` to `str`, `float`, or `int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, or `int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + pass + + def finish(self): + """ + Should run any finalizing functions within the tracking API. If the API should not have one, just don't + overwrite that method. + """ + pass + + +class TensorBoardTracker(GeneralTracker): + """ + A `Tracker` class that supports `tensorboard`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run + logging_dir (`str`, `os.PathLike`): + Location for TensorBoard logs to be stored. + """ + + requires_logging_directory = True + + def __init__(self, run_name: str, logging_dir: Optional[Union[str, os.PathLike]]): + self.run_name = run_name + self.logging_dir = os.path.join(logging_dir, run_name) + self.writer = tensorboard.SummaryWriter(self.logging_dir) + logger.info(f"Initialized TensorBoard project {self.run_name} logging to {self.logging_dir}") + logger.info( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + self.writer.add_hparams(values, metric_dict={}) + self.writer.flush() + logger.info("Stored initial configuration hyperparameters to TensorBoard") + + def log(self, values: dict, step: Optional[int] = None): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + for k, v in values.items(): + if isinstance(v, (int, float)): + self.writer.add_scalar(k, v, global_step=step) + elif isinstance(v, str): + self.writer.add_text(k, v, global_step=step) + elif isinstance(v, dict): + self.writer.add_scalars(k, v, global_step=step) + self.writer.flush() + logger.info("Successfully logged to TensorBoard") + + def finish(self): + """ + Closes `TensorBoard` writer + """ + self.writer.close() + logger.info("TensorBoard writer closed") + + +class WandBTracker(GeneralTracker): + """ + A `Tracker` class that supports `wandb`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run. + """ + + requires_logging_directory = False + + def __init__(self, run_name: str): + self.run_name = run_name + self.run = wandb.init(project=self.run_name) + logger.info(f"Initialized WandB project {self.run_name}") + logger.info( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + wandb.config.update(values) + logger.info("Stored initial configuration hyperparameters to WandB") + + def log(self, values: dict, step: Optional[int] = None): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + self.run.log(values, step=step) + logger.info("Successfully logged to WandB") + + def finish(self): + """ + Closes `wandb` writer + """ + self.run.finish() + logger.info("WandB run closed") + + +class CometMLTracker(GeneralTracker): + """ + A `Tracker` class that supports `comet_ml`. Should be initialized at the start of your script. + + API keys must be stored in a Comet config file. + + Args: + run_name (`str`): + The name of the experiment run. + """ + + requires_logging_directory = False + + def __init__(self, run_name: str): + self.run_name = run_name + self.writer = Experiment(project_name=run_name) + logger.info(f"Initialized CometML project {self.run_name}") + logger.info( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + self.writer.log_parameters(values) + logger.info("Stored initial configuration hyperparameters to CometML") + + def log(self, values: dict, step: Optional[int] = None): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + if step is not None: + self.writer.set_step(step) + for k, v in values.items(): + if isinstance(v, (int, float)): + self.writer.log_metric(k, v, step=step) + elif isinstance(v, str): + self.writer.log_other(k, v) + elif isinstance(v, dict): + self.writer.log_metrics(v, step=step) + logger.info("Successfully logged to CometML") + + def finish(self): + """ + Closes `comet-ml` writer + """ + self.writer.end() + logger.info("CometML run closed") + + +LOGGER_TYPE_TO_CLASS = {"tensorboard": TensorBoardTracker, "wandb": WandBTracker, "comet_ml": CometMLTracker} + + +def filter_trackers( + log_with: List[Union[str, LoggerType, GeneralTracker]], logging_dir: Union[str, os.PathLike] = None +): + """ + Takes in a list of potential tracker types and checks that: + - The tracker wanted is available in that environment + - Filters out repeats of tracker types + - If `all` is in `log_with`, will return all trackers in the environment + - If a tracker requires a `logging_dir`, ensures that `logging_dir` is not `None` + + Args: + log_with (list of `str`, [`~utils.LoggerType`] or [`~tracking.GeneralTracker`], *optional*): + A list of loggers to be setup for experiment tracking. Should be one or several of: + + - `"all"` + - `"tensorboard"` + - `"wandb"` + - `"comet_ml"` + If `"all`" is selected, will pick up all available trackers in the environment and intialize them. Can also + accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`. + logging_dir (`str`, `os.PathLike`, *optional*): + A path to a directory for storing logs of locally-compatible loggers. + """ + loggers = [] + if log_with is not None: + if not isinstance(log_with, (list, tuple)): + log_with = [log_with] + logger.debug(f"{log_with}") + if "all" in log_with or LoggerType.ALL in log_with: + loggers = [o for o in log_with if issubclass(type(o), GeneralTracker)] + get_available_trackers() + else: + for log_type in log_with: + if log_type not in LoggerType and not issubclass(type(log_type), GeneralTracker): + raise ValueError(f"Unsupported logging capability: {log_type}. Choose between {LoggerType.list()}") + if issubclass(type(log_type), GeneralTracker): + loggers.append(log_type) + else: + log_type = LoggerType(log_type) + if log_type not in loggers: + if log_type in get_available_trackers(): + tracker_init = LOGGER_TYPE_TO_CLASS[str(log_type)] + if getattr(tracker_init, "requires_logging_directory"): + if logging_dir is None: + raise ValueError( + f"Logging with `{str(log_type)}` requires a `logging_dir` to be passed in." + ) + loggers.append(log_type) + else: + logger.info(f"Tried adding logger {log_type}, but package is unavailable in the system.") + + return loggers diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9f820ef6e5f21042efd0e9dc18d097388d7207 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/__init__.py @@ -0,0 +1,39 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + + +from .compat import PY2, PY3 +from .universaldetector import UniversalDetector +from .version import __version__, VERSION + + +def detect(byte_str): + """ + Detect the encoding of the given byte string. + + :param byte_str: The byte sequence to examine. + :type byte_str: ``bytes`` or ``bytearray`` + """ + if not isinstance(byte_str, bytearray): + if not isinstance(byte_str, bytes): + raise TypeError('Expected object of type bytes or bytearray, got: ' + '{0}'.format(type(byte_str))) + else: + byte_str = bytearray(byte_str) + detector = UniversalDetector() + detector.feed(byte_str) + return detector.close() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/codingstatemachine.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/codingstatemachine.py new file mode 100644 index 0000000000000000000000000000000000000000..68fba44f14366c448f13db3cf9cf1665af2e498c --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/codingstatemachine.py @@ -0,0 +1,88 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import logging + +from .enums import MachineState + + +class CodingStateMachine(object): + """ + A state machine to verify a byte sequence for a particular encoding. For + each byte the detector receives, it will feed that byte to every active + state machine available, one byte at a time. The state machine changes its + state based on its previous state and the byte it receives. There are 3 + states in a state machine that are of interest to an auto-detector: + + START state: This is the state to start with, or a legal byte sequence + (i.e. a valid code point) for character has been identified. + + ME state: This indicates that the state machine identified a byte sequence + that is specific to the charset it is designed for and that + there is no other possible encoding which can contain this byte + sequence. This will to lead to an immediate positive answer for + the detector. + + ERROR state: This indicates the state machine identified an illegal byte + sequence for that encoding. This will lead to an immediate + negative answer for this encoding. Detector will exclude this + encoding from consideration from here on. + """ + def __init__(self, sm): + self._model = sm + self._curr_byte_pos = 0 + self._curr_char_len = 0 + self._curr_state = None + self.logger = logging.getLogger(__name__) + self.reset() + + def reset(self): + self._curr_state = MachineState.START + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + byte_class = self._model['class_table'][c] + if self._curr_state == MachineState.START: + self._curr_byte_pos = 0 + self._curr_char_len = self._model['char_len_table'][byte_class] + # from byte's class and state_table, we get its next state + curr_state = (self._curr_state * self._model['class_factor'] + + byte_class) + self._curr_state = self._model['state_table'][curr_state] + self._curr_byte_pos += 1 + return self._curr_state + + def get_current_charlen(self): + return self._curr_char_len + + def get_coding_state_machine(self): + return self._model['name'] + + @property + def language(self): + return self._model['language'] diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/cp949prober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/cp949prober.py new file mode 100644 index 0000000000000000000000000000000000000000..efd793abca4bf496001a4e46a67557e5a6f16bba --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/cp949prober.py @@ -0,0 +1,49 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .chardistribution import EUCKRDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber +from .mbcssm import CP949_SM_MODEL + + +class CP949Prober(MultiByteCharSetProber): + def __init__(self): + super(CP949Prober, self).__init__() + self.coding_sm = CodingStateMachine(CP949_SM_MODEL) + # NOTE: CP949 is a superset of EUC-KR, so the distribution should be + # not different. + self.distribution_analyzer = EUCKRDistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "CP949" + + @property + def language(self): + return "Korean" diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/enums.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/enums.py new file mode 100644 index 0000000000000000000000000000000000000000..04512072251c429e63ed110cdbafaf4b3cca3412 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/enums.py @@ -0,0 +1,76 @@ +""" +All of the Enums that are used throughout the chardet package. + +:author: Dan Blanchard (dan.blanchard@gmail.com) +""" + + +class InputState(object): + """ + This enum represents the different states a universal detector can be in. + """ + PURE_ASCII = 0 + ESC_ASCII = 1 + HIGH_BYTE = 2 + + +class LanguageFilter(object): + """ + This enum represents the different language filters we can apply to a + ``UniversalDetector``. + """ + CHINESE_SIMPLIFIED = 0x01 + CHINESE_TRADITIONAL = 0x02 + JAPANESE = 0x04 + KOREAN = 0x08 + NON_CJK = 0x10 + ALL = 0x1F + CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL + CJK = CHINESE | JAPANESE | KOREAN + + +class ProbingState(object): + """ + This enum represents the different states a prober can be in. + """ + DETECTING = 0 + FOUND_IT = 1 + NOT_ME = 2 + + +class MachineState(object): + """ + This enum represents the different states a state machine can be in. + """ + START = 0 + ERROR = 1 + ITS_ME = 2 + + +class SequenceLikelihood(object): + """ + This enum represents the likelihood of a character following the previous one. + """ + NEGATIVE = 0 + UNLIKELY = 1 + LIKELY = 2 + POSITIVE = 3 + + @classmethod + def get_num_categories(cls): + """:returns: The number of likelihood categories in the enum.""" + return 4 + + +class CharacterCategory(object): + """ + This enum represents the different categories language models for + ``SingleByteCharsetProber`` put characters into. + + Anything less than CONTROL is considered a letter. + """ + UNDEFINED = 255 + LINE_BREAK = 254 + SYMBOL = 253 + DIGIT = 252 + CONTROL = 251 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/eucjpprober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/eucjpprober.py new file mode 100644 index 0000000000000000000000000000000000000000..20ce8f7d15bad9d48a3e0363de2286093a4a28cc --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/eucjpprober.py @@ -0,0 +1,92 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .enums import ProbingState, MachineState +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCJPDistributionAnalysis +from .jpcntx import EUCJPContextAnalysis +from .mbcssm import EUCJP_SM_MODEL + + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + super(EUCJPProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) + self.distribution_analyzer = EUCJPDistributionAnalysis() + self.context_analyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + super(EUCJPProber, self).reset() + self.context_analyzer.reset() + + @property + def charset_name(self): + return "EUC-JP" + + @property + def language(self): + return "Japanese" + + def feed(self, byte_str): + for i in range(len(byte_str)): + # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.ERROR: + self.logger.debug('%s %s prober hit error at byte %s', + self.charset_name, self.language, i) + self._state = ProbingState.NOT_ME + break + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + char_len = self.coding_sm.get_current_charlen() + if i == 0: + self._last_char[1] = byte_str[0] + self.context_analyzer.feed(self._last_char, char_len) + self.distribution_analyzer.feed(self._last_char, char_len) + else: + self.context_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + + self._last_char[0] = byte_str[-1] + + if self.state == ProbingState.DETECTING: + if (self.context_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.FOUND_IT + + return self.state + + def get_confidence(self): + context_conf = self.context_analyzer.get_confidence() + distrib_conf = self.distribution_analyzer.get_confidence() + return max(context_conf, distrib_conf) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/euctwprober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/euctwprober.py new file mode 100644 index 0000000000000000000000000000000000000000..35669cc4dd809fa4007ee67c752f1be991135e77 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/euctwprober.py @@ -0,0 +1,46 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCTWDistributionAnalysis +from .mbcssm import EUCTW_SM_MODEL + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + super(EUCTWProber, self).__init__() + self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) + self.distribution_analyzer = EUCTWDistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "EUC-TW" + + @property + def language(self): + return "Taiwan" diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/gb2312prober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/gb2312prober.py new file mode 100644 index 0000000000000000000000000000000000000000..8446d2dd959721cc86d4ae5a7699197454f3aa91 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/gb2312prober.py @@ -0,0 +1,46 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import GB2312DistributionAnalysis +from .mbcssm import GB2312_SM_MODEL + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + super(GB2312Prober, self).__init__() + self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) + self.distribution_analyzer = GB2312DistributionAnalysis() + self.reset() + + @property + def charset_name(self): + return "GB2312" + + @property + def language(self): + return "Chinese" diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/hebrewprober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/hebrewprober.py new file mode 100644 index 0000000000000000000000000000000000000000..b0e1bf49268203d1f9d14cbe73753d95dc66c8a4 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/hebrewprober.py @@ -0,0 +1,292 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .enums import ProbingState + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +class HebrewProber(CharSetProber): + # windows-1255 / ISO-8859-8 code points of interest + FINAL_KAF = 0xea + NORMAL_KAF = 0xeb + FINAL_MEM = 0xed + NORMAL_MEM = 0xee + FINAL_NUN = 0xef + NORMAL_NUN = 0xf0 + FINAL_PE = 0xf3 + NORMAL_PE = 0xf4 + FINAL_TSADI = 0xf5 + NORMAL_TSADI = 0xf6 + + # Minimum Visual vs Logical final letter score difference. + # If the difference is below this, don't rely solely on the final letter score + # distance. + MIN_FINAL_CHAR_DISTANCE = 5 + + # Minimum Visual vs Logical model score difference. + # If the difference is below this, don't rely at all on the model score + # distance. + MIN_MODEL_DISTANCE = 0.01 + + VISUAL_HEBREW_NAME = "ISO-8859-8" + LOGICAL_HEBREW_NAME = "windows-1255" + + def __init__(self): + super(HebrewProber, self).__init__() + self._final_char_logical_score = None + self._final_char_visual_score = None + self._prev = None + self._before_prev = None + self._logical_prober = None + self._visual_prober = None + self.reset() + + def reset(self): + self._final_char_logical_score = 0 + self._final_char_visual_score = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate + # a word delimiter at the beginning of the data + self._prev = ' ' + self._before_prev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._logical_prober = logicalProber + self._visual_prober = visualProber + + def is_final(self, c): + return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN, + self.FINAL_PE, self.FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters + # causing the Non-Final tsadi to appear at an end of a word even + # though this is not the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being + # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' + # for example legally end with a Non-Final Pe or Kaf. However, the + # benefit of these letters as Non-Final letters outweighs the damage + # since these words are quite rare. + return c in [self.NORMAL_KAF, self.NORMAL_MEM, + self.NORMAL_NUN, self.NORMAL_PE] + + def feed(self, byte_str): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew + # or visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is + # an indication that the text is laid out "naturally" since the + # final letter really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In + # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, + # should not end with the Non-Final form of that letter. Exceptions + # to this rule are mentioned above in isNonFinal(). This is an + # indication that the text is laid out backwards. +1 for visual + # score + # 3) A word longer than 1 letter, starting with a final letter. Final + # letters should not appear at the beginning of a word. This is an + # indication that the text is laid out backwards. +1 for visual + # score. + # + # The visual score and logical score are accumulated throughout the + # text and are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since + # that case is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with + # spaces) so the word boundary detection works properly. [MAP] + + if self.state == ProbingState.NOT_ME: + # Both model probers say it's not them. No reason to continue. + return ProbingState.NOT_ME + + byte_str = self.filter_high_byte_only(byte_str) + + for cur in byte_str: + if cur == ' ': + # We stand on a space - a word just ended + if self._before_prev != ' ': + # next-to-last char was not a space so self._prev is not a + # 1 letter word + if self.is_final(self._prev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._final_char_logical_score += 1 + elif self.is_non_final(self._prev): + # case (2) [-2:not space][-1:Non-Final letter][ + # cur:space] + self._final_char_visual_score += 1 + else: + # Not standing on a space + if ((self._before_prev == ' ') and + (self.is_final(self._prev)) and (cur != ' ')): + # case (3) [-2:space][-1:final letter][cur:not space] + self._final_char_visual_score += 1 + self._before_prev = self._prev + self._prev = cur + + # Forever detecting, till the end or until both model probers return + # ProbingState.NOT_ME (handled above) + return ProbingState.DETECTING + + @property + def charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._final_char_logical_score - self._final_char_visual_score + if finalsub >= self.MIN_FINAL_CHAR_DISTANCE: + return self.LOGICAL_HEBREW_NAME + if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE: + return self.VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = (self._logical_prober.get_confidence() + - self._visual_prober.get_confidence()) + if modelsub > self.MIN_MODEL_DISTANCE: + return self.LOGICAL_HEBREW_NAME + if modelsub < -self.MIN_MODEL_DISTANCE: + return self.VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the + # day. + if finalsub < 0.0: + return self.VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to + # Logical. + return self.LOGICAL_HEBREW_NAME + + @property + def language(self): + return 'Hebrew' + + @property + def state(self): + # Remain active as long as any of the model probers are active. + if (self._logical_prober.state == ProbingState.NOT_ME) and \ + (self._visual_prober.state == ProbingState.NOT_ME): + return ProbingState.NOT_ME + return ProbingState.DETECTING diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/jpcntx.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/jpcntx.py new file mode 100644 index 0000000000000000000000000000000000000000..20044e4bc8f5a9775d82be0ecf387ce752732507 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/jpcntx.py @@ -0,0 +1,233 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis(object): + NUM_OF_CATEGORY = 6 + DONT_KNOW = -1 + ENOUGH_REL_THRESHOLD = 100 + MAX_REL_THRESHOLD = 1000 + MINIMUM_DATA_THRESHOLD = 4 + + def __init__(self): + self._total_rel = None + self._rel_sample = None + self._need_to_skip_char_num = None + self._last_char_order = None + self._done = None + self.reset() + + def reset(self): + self._total_rel = 0 # total sequence received + # category counters, each integer counts sequence in its category + self._rel_sample = [0] * self.NUM_OF_CATEGORY + # if last byte in current buffer is not the last byte of a character, + # we need to know how many bytes to skip in next buffer + self._need_to_skip_char_num = 0 + self._last_char_order = -1 # The order of previous char + # If this flag is set to True, detection is done and conclusion has + # been made + self._done = False + + def feed(self, byte_str, num_bytes): + if self._done: + return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not + # complete, we record how many byte needed to complete that character + # and skip these bytes here. We can choose to record those bytes as + # well and analyse the character once it is complete, but since a + # character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._need_to_skip_char_num + while i < num_bytes: + order, char_len = self.get_order(byte_str[i:i + 2]) + i += char_len + if i > num_bytes: + self._need_to_skip_char_num = i - num_bytes + self._last_char_order = -1 + else: + if (order != -1) and (self._last_char_order != -1): + self._total_rel += 1 + if self._total_rel > self.MAX_REL_THRESHOLD: + self._done = True + break + self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 + self._last_char_order = order + + def got_enough_data(self): + return self._total_rel > self.ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._total_rel > self.MINIMUM_DATA_THRESHOLD: + return (self._total_rel - self._rel_sample[0]) / self._total_rel + else: + return self.DONT_KNOW + + def get_order(self, byte_str): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def __init__(self): + super(SJISContextAnalysis, self).__init__() + self._charset_name = "SHIFT_JIS" + + @property + def charset_name(self): + return self._charset_name + + def get_order(self, byte_str): + if not byte_str: + return -1, 1 + # find out current char's byte length + first_char = byte_str[0] + if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC): + char_len = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self._charset_name = "CP932" + else: + char_len = 1 + + # return its order if it is hiragana + if len(byte_str) > 1: + second_char = byte_str[1] + if (first_char == 202) and (0x9F <= second_char <= 0xF1): + return second_char - 0x9F, char_len + + return -1, char_len + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, byte_str): + if not byte_str: + return -1, 1 + # find out current char's byte length + first_char = byte_str[0] + if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): + char_len = 2 + elif first_char == 0x8F: + char_len = 3 + else: + char_len = 1 + + # return its order if it is hiragana + if len(byte_str) > 1: + second_char = byte_str[1] + if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): + return second_char - 0xA1, char_len + + return -1, char_len + + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langcyrillicmodel.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langcyrillicmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..e5f9a1fd19cca472d785bf90c6395cd11b524766 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langcyrillicmodel.py @@ -0,0 +1,333 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# KOI8-R language model +# Character Mapping Table: +KOI8R_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { + 'char_to_order_map': KOI8R_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "KOI8-R", + 'language': 'Russian', +} + +Win1251CyrillicModel = { + 'char_to_order_map': win1251_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "windows-1251", + 'language': 'Russian', +} + +Latin5CyrillicModel = { + 'char_to_order_map': latin5_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-5", + 'language': 'Russian', +} + +MacCyrillicModel = { + 'char_to_order_map': macCyrillic_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "MacCyrillic", + 'language': 'Russian', +} + +Ibm866Model = { + 'char_to_order_map': IBM866_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "IBM866", + 'language': 'Russian', +} + +Ibm855Model = { + 'char_to_order_map': IBM855_char_to_order_map, + 'precedence_matrix': RussianLangModel, + 'typical_positive_ratio': 0.976601, + 'keep_english_letter': False, + 'charset_name': "IBM855", + 'language': 'Russian', +} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langgreekmodel.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langgreekmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..533222166cca9fce442655d9f3098126f50e6140 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langgreekmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_char_to_order_map = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { + 'char_to_order_map': Latin7_char_to_order_map, + 'precedence_matrix': GreekLangModel, + 'typical_positive_ratio': 0.982851, + 'keep_english_letter': False, + 'charset_name': "ISO-8859-7", + 'language': 'Greek', +} + +Win1253GreekModel = { + 'char_to_order_map': win1253_char_to_order_map, + 'precedence_matrix': GreekLangModel, + 'typical_positive_ratio': 0.982851, + 'keep_english_letter': False, + 'charset_name': "windows-1253", + 'language': 'Greek', +} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langthaimodel.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langthaimodel.py new file mode 100644 index 0000000000000000000000000000000000000000..15f94c2df021c9cccc761ebeec80146edbb000c9 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langthaimodel.py @@ -0,0 +1,199 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { + 'char_to_order_map': TIS620CharToOrderMap, + 'precedence_matrix': ThaiLangModel, + 'typical_positive_ratio': 0.926386, + 'keep_english_letter': False, + 'charset_name': "TIS-620", + 'language': 'Thai', +} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langturkishmodel.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langturkishmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..a427a457398de8076cdcefb5a6c391e89500bce8 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/langturkishmodel.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Özgür Baskın - Turkish Language Model +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin5_TurkishCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42, + 48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255, +255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15, + 26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255, +180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165, +164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106, +150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136, + 94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125, +124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119, + 68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86, + 89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96, + 30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107, +) + +TurkishLangModel = ( +3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3, +3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, +3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3, +3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1, +3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3, +3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1, +3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2, +2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1, +3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2, +2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0, +1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2, +3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1, +3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2, +2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1, +3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2, +2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, +3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2, +3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, +3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3, +0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1, +3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1, +0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0, +3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1, +3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3, +2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3, +2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1, +3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0, +0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0, +1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2, +3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1, +1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0, +3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0, +0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0, +3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1, +0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1, +1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0, +1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3, +2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1, +2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1, +2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0, +3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0, +0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, +3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1, +1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2, +0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1, +3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1, +0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0, +3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0, +3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0, +1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2, +2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1, +0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0, +3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0, +0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0, +0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0, +3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0, +0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0, +0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0, +3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0, +0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1, +3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0, +0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1, +0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0, +3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, +0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0, +0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0, +3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0, +0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0, +0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0, +0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0, +0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0, +0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0, +0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0, +0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0, +1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0, +0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1, +0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0, +3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0, +0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0, +2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0, +2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0, +0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1, +0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin5TurkishModel = { + 'char_to_order_map': Latin5_TurkishCharToOrderMap, + 'precedence_matrix': TurkishLangModel, + 'typical_positive_ratio': 0.970290, + 'keep_english_letter': True, + 'charset_name': "ISO-8859-9", + 'language': 'Turkish', +} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/mbcsgroupprober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/mbcsgroupprober.py new file mode 100644 index 0000000000000000000000000000000000000000..530abe75e0c00cbfcb2a310d872866f320977d0a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/mbcsgroupprober.py @@ -0,0 +1,54 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .utf8prober import UTF8Prober +from .sjisprober import SJISProber +from .eucjpprober import EUCJPProber +from .gb2312prober import GB2312Prober +from .euckrprober import EUCKRProber +from .cp949prober import CP949Prober +from .big5prober import Big5Prober +from .euctwprober import EUCTWProber + + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self, lang_filter=None): + super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) + self.probers = [ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + CP949Prober(), + Big5Prober(), + EUCTWProber() + ] + self.reset() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sbcsgroupprober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sbcsgroupprober.py new file mode 100644 index 0000000000000000000000000000000000000000..98e95dc1a3cbc65e97bc726ab7000955132719dd --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sbcsgroupprober.py @@ -0,0 +1,73 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .sbcharsetprober import SingleByteCharSetProber +from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, + Latin5CyrillicModel, MacCyrillicModel, + Ibm866Model, Ibm855Model) +from .langgreekmodel import Latin7GreekModel, Win1253GreekModel +from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from .langthaimodel import TIS620ThaiModel +from .langhebrewmodel import Win1255HebrewModel +from .hebrewprober import HebrewProber +from .langturkishmodel import Latin5TurkishModel + + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + super(SBCSGroupProber, self).__init__() + self.probers = [ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) + # after we retrain model. + # SingleByteCharSetProber(Latin2HungarianModel), + # SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + SingleByteCharSetProber(Latin5TurkishModel), + ] + hebrew_prober = HebrewProber() + logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, + False, hebrew_prober) + visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, + hebrew_prober) + hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) + self.probers.extend([hebrew_prober, logical_hebrew_prober, + visual_hebrew_prober]) + + self.reset() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sjisprober.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sjisprober.py new file mode 100644 index 0000000000000000000000000000000000000000..9e29623bdc54a7c6d11bcc167d71bb44cc9be39d --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/sjisprober.py @@ -0,0 +1,92 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import SJISDistributionAnalysis +from .jpcntx import SJISContextAnalysis +from .mbcssm import SJIS_SM_MODEL +from .enums import ProbingState, MachineState + + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + super(SJISProber, self).__init__() + self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) + self.distribution_analyzer = SJISDistributionAnalysis() + self.context_analyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + super(SJISProber, self).reset() + self.context_analyzer.reset() + + @property + def charset_name(self): + return self.context_analyzer.charset_name + + @property + def language(self): + return "Japanese" + + def feed(self, byte_str): + for i in range(len(byte_str)): + coding_state = self.coding_sm.next_state(byte_str[i]) + if coding_state == MachineState.ERROR: + self.logger.debug('%s %s prober hit error at byte %s', + self.charset_name, self.language, i) + self._state = ProbingState.NOT_ME + break + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + char_len = self.coding_sm.get_current_charlen() + if i == 0: + self._last_char[1] = byte_str[0] + self.context_analyzer.feed(self._last_char[2 - char_len:], + char_len) + self.distribution_analyzer.feed(self._last_char, char_len) + else: + self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 + - char_len], char_len) + self.distribution_analyzer.feed(byte_str[i - 1:i + 1], + char_len) + + self._last_char[0] = byte_str[-1] + + if self.state == ProbingState.DETECTING: + if (self.context_analyzer.got_enough_data() and + (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + self._state = ProbingState.FOUND_IT + + return self.state + + def get_confidence(self): + context_conf = self.context_analyzer.get_confidence() + distrib_conf = self.distribution_analyzer.get_confidence() + return max(context_conf, distrib_conf) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/universaldetector.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/universaldetector.py new file mode 100644 index 0000000000000000000000000000000000000000..7b4e92d6158527736e3d14d6f725edada8f94b00 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/chardet/universaldetector.py @@ -0,0 +1,286 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### +""" +Module containing the UniversalDetector detector class, which is the primary +class a user of ``chardet`` should use. + +:author: Mark Pilgrim (initial port to Python) +:author: Shy Shalom (original C code) +:author: Dan Blanchard (major refactoring for 3.0) +:author: Ian Cordasco +""" + + +import codecs +import logging +import re + +from .charsetgroupprober import CharSetGroupProber +from .enums import InputState, LanguageFilter, ProbingState +from .escprober import EscCharSetProber +from .latin1prober import Latin1Prober +from .mbcsgroupprober import MBCSGroupProber +from .sbcsgroupprober import SBCSGroupProber + + +class UniversalDetector(object): + """ + The ``UniversalDetector`` class underlies the ``chardet.detect`` function + and coordinates all of the different charset probers. + + To get a ``dict`` containing an encoding and its confidence, you can simply + run: + + .. code:: + + u = UniversalDetector() + u.feed(some_bytes) + u.close() + detected = u.result + + """ + + MINIMUM_THRESHOLD = 0.20 + HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]') + ESC_DETECTOR = re.compile(b'(\033|~{)') + WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]') + ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252', + 'iso-8859-2': 'Windows-1250', + 'iso-8859-5': 'Windows-1251', + 'iso-8859-6': 'Windows-1256', + 'iso-8859-7': 'Windows-1253', + 'iso-8859-8': 'Windows-1255', + 'iso-8859-9': 'Windows-1254', + 'iso-8859-13': 'Windows-1257'} + + def __init__(self, lang_filter=LanguageFilter.ALL): + self._esc_charset_prober = None + self._charset_probers = [] + self.result = None + self.done = None + self._got_data = None + self._input_state = None + self._last_char = None + self.lang_filter = lang_filter + self.logger = logging.getLogger(__name__) + self._has_win_bytes = None + self.reset() + + def reset(self): + """ + Reset the UniversalDetector and all of its probers back to their + initial states. This is called by ``__init__``, so you only need to + call this directly in between analyses of different documents. + """ + self.result = {'encoding': None, 'confidence': 0.0, 'language': None} + self.done = False + self._got_data = False + self._has_win_bytes = False + self._input_state = InputState.PURE_ASCII + self._last_char = b'' + if self._esc_charset_prober: + self._esc_charset_prober.reset() + for prober in self._charset_probers: + prober.reset() + + def feed(self, byte_str): + """ + Takes a chunk of a document and feeds it through all of the relevant + charset probers. + + After calling ``feed``, you can check the value of the ``done`` + attribute to see if you need to continue feeding the + ``UniversalDetector`` more data, or if it has made a prediction + (in the ``result`` attribute). + + .. note:: + You should always call ``close`` when you're done feeding in your + document if ``done`` is not already ``True``. + """ + if self.done: + return + + if not len(byte_str): + return + + if not isinstance(byte_str, bytearray): + byte_str = bytearray(byte_str) + + # First check for known BOMs, since these are guaranteed to be correct + if not self._got_data: + # If the data starts with BOM, we know it is UTF + if byte_str.startswith(codecs.BOM_UTF8): + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8-SIG", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith((codecs.BOM_UTF32_LE, + codecs.BOM_UTF32_BE)): + # FF FE 00 00 UTF-32, little-endian BOM + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith(b'\xFE\xFF\x00\x00'): + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = {'encoding': "X-ISO-10646-UCS-4-3412", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith(b'\x00\x00\xFF\xFE'): + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = {'encoding': "X-ISO-10646-UCS-4-2143", + 'confidence': 1.0, + 'language': ''} + elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)): + # FF FE UTF-16, little endian BOM + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16", + 'confidence': 1.0, + 'language': ''} + + self._got_data = True + if self.result['encoding'] is not None: + self.done = True + return + + # If none of those matched and we've only see ASCII so far, check + # for high bytes and escape sequences + if self._input_state == InputState.PURE_ASCII: + if self.HIGH_BYTE_DETECTOR.search(byte_str): + self._input_state = InputState.HIGH_BYTE + elif self._input_state == InputState.PURE_ASCII and \ + self.ESC_DETECTOR.search(self._last_char + byte_str): + self._input_state = InputState.ESC_ASCII + + self._last_char = byte_str[-1:] + + # If we've seen escape sequences, use the EscCharSetProber, which + # uses a simple state machine to check for known escape sequences in + # HZ and ISO-2022 encodings, since those are the only encodings that + # use such sequences. + if self._input_state == InputState.ESC_ASCII: + if not self._esc_charset_prober: + self._esc_charset_prober = EscCharSetProber(self.lang_filter) + if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT: + self.result = {'encoding': + self._esc_charset_prober.charset_name, + 'confidence': + self._esc_charset_prober.get_confidence(), + 'language': + self._esc_charset_prober.language} + self.done = True + # If we've seen high bytes (i.e., those with values greater than 127), + # we need to do more complicated checks using all our multi-byte and + # single-byte probers that are left. The single-byte probers + # use character bigram distributions to determine the encoding, whereas + # the multi-byte probers use a combination of character unigram and + # bigram distributions. + elif self._input_state == InputState.HIGH_BYTE: + if not self._charset_probers: + self._charset_probers = [MBCSGroupProber(self.lang_filter)] + # If we're checking non-CJK encodings, use single-byte prober + if self.lang_filter & LanguageFilter.NON_CJK: + self._charset_probers.append(SBCSGroupProber()) + self._charset_probers.append(Latin1Prober()) + for prober in self._charset_probers: + if prober.feed(byte_str) == ProbingState.FOUND_IT: + self.result = {'encoding': prober.charset_name, + 'confidence': prober.get_confidence(), + 'language': prober.language} + self.done = True + break + if self.WIN_BYTE_DETECTOR.search(byte_str): + self._has_win_bytes = True + + def close(self): + """ + Stop analyzing the current document and come up with a final + prediction. + + :returns: The ``result`` attribute, a ``dict`` with the keys + `encoding`, `confidence`, and `language`. + """ + # Don't bother with checks if we're already done + if self.done: + return self.result + self.done = True + + if not self._got_data: + self.logger.debug('no data received!') + + # Default to ASCII if it is all we've seen so far + elif self._input_state == InputState.PURE_ASCII: + self.result = {'encoding': 'ascii', + 'confidence': 1.0, + 'language': ''} + + # If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD + elif self._input_state == InputState.HIGH_BYTE: + prober_confidence = None + max_prober_confidence = 0.0 + max_prober = None + for prober in self._charset_probers: + if not prober: + continue + prober_confidence = prober.get_confidence() + if prober_confidence > max_prober_confidence: + max_prober_confidence = prober_confidence + max_prober = prober + if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): + charset_name = max_prober.charset_name + lower_charset_name = max_prober.charset_name.lower() + confidence = max_prober.get_confidence() + # Use Windows encoding name instead of ISO-8859 if we saw any + # extra Windows-specific bytes + if lower_charset_name.startswith('iso-8859'): + if self._has_win_bytes: + charset_name = self.ISO_WIN_MAP.get(lower_charset_name, + charset_name) + self.result = {'encoding': charset_name, + 'confidence': confidence, + 'language': max_prober.language} + + # Log all prober confidences if none met MINIMUM_THRESHOLD + if self.logger.getEffectiveLevel() == logging.DEBUG: + if self.result['encoding'] is None: + self.logger.debug('no probers hit minimum threshold') + for group_prober in self._charset_probers: + if not group_prober: + continue + if isinstance(group_prober, CharSetGroupProber): + for prober in group_prober.probers: + self.logger.debug('%s %s confidence = %s', + prober.charset_name, + prober.language, + prober.get_confidence()) + else: + self.logger.debug('%s %s confidence = %s', + prober.charset_name, + prober.language, + prober.get_confidence()) + return self.result diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a124bf6486bf3d8e9e8221738e3dcef97b14ece --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/__init__.py @@ -0,0 +1 @@ +__version__ = "2.2.0" diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/cli.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..e5cff27878114cfd50192a97d2efdc7bed7228a0 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/cli.py @@ -0,0 +1,141 @@ +import argparse +import os +import sys +from pprint import pprint + +from . import __version__, api + + +def parse_args(parse_this=None): + parser = build_parser() + return parser.parse_args(parse_this) + + +def build_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-V", + "--version", + action="version", + help="Show the conda-package-handling version number and exit.", + version=f"conda-package-handling {__version__}", + ) + sp = parser.add_subparsers(title="subcommands", dest="subcommand", required=True) + + extract_parser = sp.add_parser("extract", help="extract package contents", aliases=["x"]) + extract_parser.add_argument("archive_path", help="path to archive to extract") + extract_parser.add_argument( + "--dest", + help="destination folder to extract to. If not set, defaults to" + " package filename minus extension in the same folder as the input archive." + " May be relative path used in tandem with the --prefix flag.", + ) + extract_parser.add_argument( + "--prefix", + help="base directory to extract to. Use this to set the base" + " directory, while allowing the folder name to be automatically determined " + "by the input filename. An abspath --prefix with an unset --dest will " + "achieve this.", + ) + extract_parser.add_argument( + "--info", + help="If the archive supports separate metadata, this" + " flag extracts only the metadata in the info folder from the " + "package. If the archive does not support separate metadata, this " + "flag has no effect and all files are extracted.", + action="store_true", + ) + + create_parser = sp.add_parser("create", help="bundle files into a package", aliases=["c"]) + create_parser.add_argument( + "prefix", + help="folder of files to bundle. Not strictly required to" + " have conda package metadata, but if conda package metadata isn't " + "present, you'll see a warning and your file will not work as a " + "conda package", + ) + create_parser.add_argument( + "out_fn", + help="Filename of archive to be created. Extension determines package type.", + ) + create_parser.add_argument( + "--file-list", + help="Path to file containing one relative path per" + " line that should be included in the archive. If not provided, " + "lists all files in the prefix.", + ) + create_parser.add_argument("--out-folder", help="Folder to dump final archive to") + + convert_parser = sp.add_parser( + "transmute", help="convert from one package type to another", aliases=["t"] + ) + convert_parser.add_argument( + "in_file", help="existing file to convert from. Glob patterns accepted." + ) + convert_parser.add_argument( + "out_ext", + help="extension of file to convert to. Examples: .tar.bz2, .conda", + ) + convert_parser.add_argument("--out-folder", help="Folder to dump final archive to") + convert_parser.add_argument( + "--force", action="store_true", help="Force overwrite existing package" + ) + convert_parser.add_argument( + "--processes", + type=int, + help="Max number of processes to use. If not set, defaults to 1.", + ) + convert_parser.add_argument( + "--zstd-compression-level", + help=( + "When building v2 packages, set the compression level used by " + "conda-package-handling. Defaults to 19." + ), + type=int, + choices=range(1, 23), + default=19, + ) + convert_parser.add_argument( + "--zstd-compression-threads", + help=( + "When building v2 packages, set the compression threads used by " + "conda-package-handling. Defaults to 1. -1=automatic." + ), + type=int, + default=1, + ) + + return parser + + +def main(args=None): + args = parse_args(args) + if hasattr(args, "out_folder") and args.out_folder: + args.out_folder = ( + os.path.abspath(os.path.normpath(os.path.expanduser(args.out_folder))) + os.sep + ) + if args.subcommand in ("extract", "x"): + if args.info: + api.extract(args.archive_path, args.dest, components="info", prefix=args.prefix) + else: + api.extract(args.archive_path, args.dest, prefix=args.prefix) + elif args.subcommand in ("create", "c"): + api.create(args.prefix, args.file_list, args.out_fn, args.out_folder) + elif args.subcommand in ("transmute", "t"): + failed_files = api.transmute( + args.in_file, + args.out_ext, + args.out_folder, + args.processes or 1, + force=args.force, + zstd_compress_level=args.zstd_compression_level, + zstd_compress_threads=args.zstd_compression_threads, + ) + if failed_files: + print("failed files:") + pprint(failed_files) + sys.exit(1) + + +if __name__ == "__main__": # pragma: no cover + main(args=None) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/conda_fmt.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/conda_fmt.py new file mode 100644 index 0000000000000000000000000000000000000000..7be1ac1429d8fc739316b5ea7904ecb9dc5ed035 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/conda_fmt.py @@ -0,0 +1,144 @@ +""" +The 'new' conda format, introduced in late 2018/early 2019. + +https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/packages.html +""" + +from __future__ import annotations + +import json +import os +import tarfile +from typing import Callable +from zipfile import ZIP_STORED, ZipFile + +import zstandard + +from . import utils +from .interface import AbstractBaseFormat +from .streaming import _extract + +CONDA_PACKAGE_FORMAT_VERSION = 2 +DEFAULT_COMPRESSION_TUPLE = (".tar.zst", "zstd", "zstd:compression-level=19") + +# increase to reduce speed and increase compression (22 = conda's default) +ZSTD_COMPRESS_LEVEL = 19 +# increase to reduce compression (slightly) and increase speed +ZSTD_COMPRESS_THREADS = 1 + + +class CondaFormat_v2(AbstractBaseFormat): + """If there's another conda format or breaking changes, please create a new class and keep this + one, so that handling of v2 stays working.""" + + @staticmethod + def supported(fn): + return fn.endswith(".conda") + + @staticmethod + def extract(fn, dest_dir, **kw): + components = utils.ensure_list(kw.get("components")) or ("info", "pkg") + if not os.path.isabs(fn): + fn = os.path.normpath(os.path.join(os.getcwd(), fn)) + if not os.path.isdir(dest_dir): + os.makedirs(dest_dir) + + _extract(str(fn), str(dest_dir), components=components) + + @staticmethod + def extract_info(fn, dest_dir=None): + return CondaFormat_v2.extract(fn, dest_dir, components=["info"]) + + @staticmethod + def create( + prefix, + file_list, + out_fn, + out_folder=os.getcwd(), + compressor: Callable[[], zstandard.ZstdCompressor] | None = None, + compression_tuple=(None, None, None), + ): + if os.path.isabs(out_fn): + out_folder = os.path.dirname(out_fn) + out_fn = os.path.basename(out_fn) + conda_pkg_fn = os.path.join(out_folder, out_fn) + file_id = out_fn.replace(".conda", "") + pkg_files = utils.filter_info_files(file_list, prefix) + # preserve order + pkg_files_set = set(pkg_files) + info_files = list(f for f in file_list if f not in pkg_files_set) + + if compressor and (compression_tuple != (None, None, None)): + raise ValueError("Supply one of compressor= or (deprecated) compression_tuple=") + + if compressor is None: + compressor = lambda: zstandard.ZstdCompressor( + level=ZSTD_COMPRESS_LEVEL, + threads=ZSTD_COMPRESS_THREADS, + ) + + # legacy libarchive-ish compatibility + ext, comp_filter, filter_opts = compression_tuple + if filter_opts and filter_opts.startswith("zstd:compression-level="): + compressor = lambda: zstandard.ZstdCompressor( + level=int(filter_opts.split("=", 1)[-1]), + threads=ZSTD_COMPRESS_THREADS, + ) + + class NullWriter: + """ + zstd uses less memory on extract if size is known. + """ + + def __init__(self): + self.size = 0 + + def write(self, bytes): + self.size += len(bytes) + return len(bytes) + + def tell(self): + return self.size + + with ZipFile(conda_pkg_fn, "w", compression=ZIP_STORED) as conda_file, utils.tmp_chdir( + prefix + ): + pkg_metadata = {"conda_pkg_format_version": CONDA_PACKAGE_FORMAT_VERSION} + conda_file.writestr("metadata.json", json.dumps(pkg_metadata)) + + components_files = (f"pkg-{file_id}.tar.zst", pkg_files), ( + f"info-{file_id}.tar.zst", + info_files, + ) + + # put the info last, for parity with updated transmute. + compress = compressor() + for component, files in components_files: + # If size is known, the decompressor may be able to allocate less memory. + # The compressor will error if size is not correct. + with tarfile.TarFile(fileobj=NullWriter(), mode="w") as sizer: # type: ignore + for file in files: + sizer.add(file, filter=utils.anonymize_tarinfo) + size = sizer.fileobj.size # type: ignore + + with conda_file.open(component, "w") as component_file: + # only one stream_writer() per compressor() must be in use at a time + component_stream = compress.stream_writer( + component_file, size=size, closefd=False + ) + component_tar = tarfile.TarFile(fileobj=component_stream, mode="w") + + for file in files: + component_tar.add(file, filter=utils.anonymize_tarinfo) + + component_tar.close() + component_stream.close() + + return conda_pkg_fn + + @staticmethod + def get_pkg_details(in_file): + stat_result = os.stat(in_file) + size = stat_result.st_size + md5, sha256 = utils.checksums(in_file, ("md5", "sha256")) + return {"size": size, "md5": md5, "sha256": sha256} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/interface.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/interface.py new file mode 100644 index 0000000000000000000000000000000000000000..b5668c2f50ea85b387db8f4e441b625c358eade8 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/interface.py @@ -0,0 +1,24 @@ +import abc +import os + + +class AbstractBaseFormat(metaclass=abc.ABCMeta): + @staticmethod + @abc.abstractmethod + def supported(fn): # pragma: no cover + return False + + @staticmethod + @abc.abstractmethod + def extract(fn, dest_dir, **kw): # pragma: no cover + raise NotImplementedError + + @staticmethod + @abc.abstractmethod + def create(prefix, file_list, out_fn, out_folder=os.getcwd(), **kw): # pragma: no cover + raise NotImplementedError + + @staticmethod + @abc.abstractmethod + def get_pkg_details(in_file): # pragma: no cover + raise NotImplementedError diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/streaming.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/streaming.py new file mode 100644 index 0000000000000000000000000000000000000000..fde2ef5bad13a446e1b0be0ff343dc9eb1cbf509 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/streaming.py @@ -0,0 +1,42 @@ +""" +Exception-compatible adapter from conda_package_streaming. +""" + +from __future__ import annotations + +from tarfile import TarError +from zipfile import BadZipFile + +from conda_package_streaming.extract import exceptions as cps_exceptions +from conda_package_streaming.extract import extract_stream, package_streaming + +from . import exceptions + + +def _extract(filename: str, dest_dir: str, components: list[str]): + """ + Extract .conda or .tar.bz2 package to dest_dir. + + If it's a conda package, components may be ["pkg", "info"] + + If it's a .tar.bz2 package, components must equal ["pkg"] + + Internal. Skip directly to conda-package-streaming if you don't need + exception compatibility. + """ + + if str(filename).endswith(".tar.bz2"): + assert components == ["pkg"] + + try: + with open(filename, "rb") as fileobj: + for component in components: + # will parse zipfile twice + stream = package_streaming.stream_conda_component( + filename, fileobj, component=component + ) + extract_stream(stream, dest_dir) + except cps_exceptions.CaseInsensitiveFileSystemError as e: + raise exceptions.CaseInsensitiveFileSystemError(filename, dest_dir) from e + except (OSError, TarError, BadZipFile) as e: + raise exceptions.InvalidArchiveError(filename, f"failed with error: {str(e)}") from e diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/utils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3aa9aeb267455bc993057bec369c74cf807a96 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/utils.py @@ -0,0 +1,493 @@ +import contextlib +import fnmatch +import hashlib +import logging +import os +import re +import shutil +import sys +import warnings as _warnings +from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor +from errno import EACCES, ENOENT, EPERM, EROFS +from itertools import chain +from os.path import ( + abspath, + basename, + dirname, + isdir, + isfile, + islink, + join, + lexists, + normpath, + split, +) +from stat import S_IEXEC, S_IMODE, S_ISDIR, S_ISREG, S_IWRITE +from subprocess import STDOUT, CalledProcessError, check_output, list2cmdline +from tempfile import NamedTemporaryFile, mkdtemp + +on_win = sys.platform == "win32" +log = logging.getLogger(__file__) +CONDA_TEMP_EXTENSION = ".c~" + + +def which(executable): + from distutils.spawn import find_executable + + return find_executable(executable) + + +def make_writable(path): + try: + mode = os.lstat(path).st_mode + if S_ISDIR(mode): + os.chmod(path, S_IMODE(mode) | S_IWRITE | S_IEXEC) + elif S_ISREG(mode) or islink(path): + os.chmod(path, S_IMODE(mode) | S_IWRITE) + else: + log.debug("path cannot be made writable: %s", path) + return True + except Exception as e: + eno = getattr(e, "errno", None) + if eno in (ENOENT,): + log.debug("tried to make writable, but didn't exist: %s", path) + raise + elif eno in (EACCES, EPERM, EROFS): + log.debug("tried make writable but failed: %s\n%r", path, e) + return False + else: + log.warn("Error making path writable: %s\n%r", path, e) + raise + + +class DummyExecutor(Executor): + def map(self, func, *iterables): + for iterable in iterables: + for thing in iterable: + yield func(thing) + + +def get_executor(processes): + return DummyExecutor() if processes == 1 else ProcessPoolExecutor(max_workers=processes) + + +def recursive_make_writable(path): + # The need for this function was pointed out at + # https://github.com/conda/conda/issues/3266#issuecomment-239241915 + # Especially on windows, file removal will often fail because it is marked read-only + if isdir(path): + for root, dirs, files in os.walk(path): + for path in chain.from_iterable((files, dirs)): + try: + make_writable(join(root, path)) + except: + pass + else: + try: + make_writable(path) + except: + pass + + +def quote_for_shell(arguments, shell=None): + if not shell: + shell = "cmd.exe" if on_win else "bash" + if shell == "cmd.exe": + return list2cmdline(arguments) + else: + # If any multiline argument gets mixed with any other argument (which is true if we've + # arrived in this function) then we just quote it. This assumes something like: + # ['python', '-c', 'a\nmultiline\nprogram\n'] + # It may make sense to allow specifying a replacement character for '\n' too? e.g. ';' + quoted = [] + # This could all be replaced with some regex wizardry but that is less readable and + # for code like this, readability is very important. + for arg in arguments: + if '"' in arg: + quote = "'" + elif "'" in arg: + quote = '"' + elif not any(_ in arg for _ in (" ", "\n")): + quote = "" + else: + quote = '"' + quoted.append(quote + arg + quote) + return " ".join(quoted) + + +def rmtree(path, *args, **kwargs): + # subprocessing to delete large folders can be quite a bit faster + path = normpath(path) + if on_win: + try: + # the fastest way seems to be using DEL to recursively delete files + # https://www.ghacks.net/2017/07/18/how-to-delete-large-folders-in-windows-super-fast/ + # However, this is not entirely safe, as it can end up following symlinks to folders + # https://superuser.com/a/306618/184799 + # so, we stick with the slower, but hopefully safer way. Maybe if we figured out how + # to scan for any possible symlinks, we could do the faster way. + # out = check_output('DEL /F/Q/S *.* > NUL 2> NUL'.format(path), shell=True, + # stderr=STDOUT, cwd=path) + + out = check_output(f'RD /S /Q "{path}" > NUL 2> NUL', shell=True, stderr=STDOUT) + except: + try: + # Try to delete in Unicode + name = None + + with NamedTemporaryFile(suffix=".bat", delete=False) as batch_file: + batch_file.write(f"RD /S {quote_for_shell([path])}\n") + batch_file.write("chcp 65001\n") + batch_file.write(f"RD /S {quote_for_shell([path])}\n") + batch_file.write("EXIT 0\n") + name = batch_file.name + # If the above is bugged we can end up deleting hard-drives, so we check + # that 'path' appears in it. This is not bulletproof but it could save you (me). + with open(name) as contents: + content = contents.read() + assert path in content + comspec = os.environ["COMSPEC"] + CREATE_NO_WINDOW = 0x08000000 + # It is essential that we `pass stdout=None, stderr=None, stdin=None` here because + # if we do not, then the standard console handles get attached and chcp affects the + # parent process (and any which share those console handles!) + out = check_output( + [comspec, "/d", "/c", name], + shell=False, + stdout=None, + stderr=None, + stdin=None, + creationflags=CREATE_NO_WINDOW, + ) + + except CalledProcessError as e: + if e.returncode != 5: + log.error(f"Removing folder {path} the fast way failed. Output was: {out}") + raise + else: + log.debug(f"removing dir contents the fast way failed. Output was: {out}") + else: + try: + os.makedirs(".empty") + except: + pass + # yes, this looks strange. See + # https://unix.stackexchange.com/a/79656/34459 + # https://web.archive.org/web/20130929001850/http://linuxnote.net/jianingy/en/linux/a-fast-way-to-remove-huge-number-of-files.html # NOQA + rsync = which("rsync") + if rsync and isdir(".empty"): + try: + out = check_output( + [ + rsync, + "-a", + "--force", + "--delete", + join(os.getcwd(), ".empty") + "/", + path + "/", + ], + stderr=STDOUT, + ) + except CalledProcessError: + log.debug(f"removing dir contents the fast way failed. Output was: {out}") + shutil.rmtree(".empty") + shutil.rmtree(path) + + +def unlink_or_rename_to_trash(path): + """If files are in use, especially on windows, we can't remove them. + The fallback path is to rename them (but keep their folder the same), + which maintains the file handle validity. See comments at: + https://serverfault.com/a/503769 + """ + try: + make_writable(path) + os.unlink(path) + except OSError: + try: + os.rename(path, path + ".conda_trash") + except OSError: + if on_win: + # on windows, it is important to use the rename program, as just using python's + # rename leads to permission errors when files are in use. + with NamedTemporaryFile(suffix=".bat") as trash_script: + with open(trash_script, "w") as f: + f.write('@pushd "%1"\n') + f.write("@REM Rename src to dest") + f.write('@ren "%2" "%3" > NUL 2> NUL")') + + _dirname, _fn = split(path) + dest_fn = path + ".conda_trash" + counter = 1 + while isfile(dest_fn): + dest_fn = dest_fn.splitext[0] + f".conda_trash_{counter}" + counter += 1 + out = "< empty >" + try: + out = check_output( + [ + "cmd.exe", + "/C", + trash_script, + _dirname, + _fn, + basename(dest_fn), + ], + stderr=STDOUT, + ) + except CalledProcessError: + log.warn( + "renaming file path {} to trash failed. Output was: {}".format( + path, out + ) + ) + + log.warn( + "Could not remove or rename {}. Please remove this file manually (you " + "may need to reboot to free file handles)".format(path) + ) + + +def remove_empty_parent_paths(path): + # recurse to clean up empty folders that were created to have a nested hierarchy + parent_path = dirname(path) + while isdir(parent_path) and not os.listdir(parent_path): + rmdir(parent_path) + parent_path = dirname(parent_path) + + +def rm_rf(path, clean_empty_parents=False, *args, **kw): + """ + Completely delete path + max_retries is the number of times to retry on failure. The default is 5. This only applies + to deleting a directory. + If removing path fails and trash is True, files will be moved to the trash directory. + """ + recursive_make_writable(path) + try: + path = abspath(path) + if isdir(path) and not islink(path): + rmdir(path) + elif lexists(path): + unlink_or_rename_to_trash(path) + else: + log.debug("rm_rf failed. Not a link, file, or directory: %s", path) + finally: + if lexists(path): + log.info("rm_rf failed for %s", path) + return False + if isdir(path): + delete_trash(path) + if clean_empty_parents: + remove_empty_parent_paths(path) + return True + + +# aliases that all do the same thing (legacy compat) +try_rmdir_all_empty = move_to_trash = move_path_to_trash = rm_rf + + +def delete_trash(prefix): + if not prefix: + prefix = sys.prefix + exclude = {"envs"} + for root, dirs, files in os.walk(prefix, topdown=True): + dirs[:] = [d for d in dirs if d not in exclude] + for fn in files: + if fnmatch.fnmatch(fn, "*.conda_trash*") or fnmatch.fnmatch( + fn, "*" + CONDA_TEMP_EXTENSION + ): + filename = join(root, fn) + try: + os.unlink(filename) + remove_empty_parent_paths(filename) + except OSError as e: + log.debug("%r errno %d\nCannot unlink %s.", e, e.errno, filename) + + +def rmdir(dirpath): + if not isdir(dirpath): + return + try: + rmtree(dirpath) + # we don't really care about errors that much. We'll catch remaining files + # with slower python logic. + except: + pass + + for root, dirs, files in os.walk(dirpath, topdown=False): + for f in files: + unlink_or_rename_to_trash(join(root, f)) + + +# we have our own TemporaryDirectory class because it's faster and handles disk issues better. +class TemporaryDirectory: + """Create and return a temporary directory. This has the same + behavior as mkdtemp but can be used as a context manager. For + example: + + with TemporaryDirectory() as tmpdir: + ... + + Upon exiting the context, the directory and everything contained + in it are removed. + """ + + # Handle mkdtemp raising an exception + name = None + _closed = False + + def __init__(self, suffix="", prefix=".cph_tmp", dir=os.getcwd()): + self.name = mkdtemp(suffix, prefix, dir) + + def __repr__(self): + return f"<{self.__class__.__name__} {self.name!r}>" + + def __enter__(self): + return self.name + + def cleanup(self, _warn=False, _warnings=_warnings): + if self.name and not self._closed: + try: + rm_rf(self.name) + except: + _warnings.warn( + 'Conda-package-handling says: "I tried to clean up, ' + "but I could not. There is a mess in %s that you might " + 'want to clean up yourself. Sorry..."' % self.name + ) + self._closed = True + if _warn and _warnings.warn: + _warnings.warn( + f"Implicitly cleaning up {self!r}", + _warnings.ResourceWarning, + ) + + def __exit__(self, exc, value, tb): + self.cleanup() + + def __del__(self): + # Issue a ResourceWarning if implicit cleanup needed + self.cleanup(_warn=True) + + +@contextlib.contextmanager +def tmp_chdir(dest): + curdir = os.getcwd() + try: + os.chdir(dest) + yield + finally: + os.chdir(curdir) + + +def ensure_list(arg): + if isinstance(arg, str) or not hasattr(arg, "__iter__"): + if arg is not None: + arg = [arg] + else: + arg = [] + return arg + + +def filter_files( + files_list, + prefix, + filter_patterns=( + r"(.*[\\\\/])?\.git[\\\\/].*", + r"(.*[\\\\/])?\.git$", + r"(.*)?\.DS_Store.*", + r".*\.la$", + r"conda-meta.*", + ), +): + """Remove things like the .git directory from the list of files to be copied""" + for pattern in filter_patterns: + r = re.compile(pattern) + files_list = set(files_list) - set(filter(r.match, files_list)) + return [ + f + for f in files_list + if + # `islink` prevents symlinks to directories from being removed + os.path.islink(os.path.join(prefix, f)) or not os.path.isdir(os.path.join(prefix, f)) + ] + + +def filter_info_files(files_list, prefix): + return filter_files( + files_list, + prefix, + filter_patterns=( + "info[\\\\/]index\\.json", + "info[\\\\/]files", + "info[\\\\/]paths\\.json", + "info[\\\\/]about\\.json", + "info[\\\\/]has_prefix", + "info[\\\\/]hash_input_files", # legacy, not used anymore + "info[\\\\/]hash_input\\.json", + "info[\\\\/]run_exports\\.yaml", # legacy + "info[\\\\/]run_exports\\.json", # current + "info[\\\\/]git", + "info[\\\\/]recipe[\\\\/].*", + "info[\\\\/]recipe_log.json", + "info[\\\\/]recipe.tar", + "info[\\\\/]test[\\\\/].*", + "info[\\\\/]LICENSE.*", + "info[\\\\/]requires", + "info[\\\\/]meta", + "info[\\\\/]platform", + "info[\\\\/]no_link", + "info[\\\\/]link\\.json", + "info[\\\\/]icon\\.png", + ), + ) + + +def _checksum(fd, algorithm, buffersize=65536): + hash_impl = getattr(hashlib, algorithm) + if not hash_impl: + raise ValueError(f"Unrecognized hash algorithm: {algorithm}") + else: + hash_impl = hash_impl() + for block in iter(lambda: fd.read(buffersize), b""): + hash_impl.update(block) + return hash_impl.hexdigest() + + +def sha256_checksum(fd): + return _checksum(fd, "sha256") + + +def md5_checksum(fd): + return _checksum(fd, "md5") + + +def checksum(fn, algorithm, buffersize=1 << 18): + """ + Calculate a checksum for a filename (not an open file). + """ + with open(fn, "rb") as fd: + return _checksum(fd, algorithm, buffersize) + + +def checksums(fn, algorithms, buffersize=1 << 18): + """ + Calculate multiple checksums for a filename in parallel. + """ + with ThreadPoolExecutor(max_workers=len(algorithms)) as e: + # take care not to share hash_impl between threads + results = [e.submit(checksum, fn, algorithm, buffersize) for algorithm in algorithms] + return [result.result() for result in results] + + +def anonymize_tarinfo(tarinfo): + """ + Remove user id, name from tarinfo. + """ + # also remove timestamps? + tarinfo.uid = 0 + tarinfo.uname = "" + tarinfo.gid = 0 + tarinfo.gname = "" + return tarinfo diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/validate.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/validate.py new file mode 100644 index 0000000000000000000000000000000000000000..024d71af293153b500a273fe2f45ba641eb48945 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/conda_package_handling/validate.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import hashlib +import os +from itertools import chain +from pathlib import Path + +from conda_package_streaming import package_streaming + +from .utils import TemporaryDirectory + + +def validate_converted_files_match( + src_file_or_folder, subject, reference_ext="" +): # pragma: nocover + # No longer used by conda-package-handling + from .api import extract + + with TemporaryDirectory() as tmpdir: + assert tmpdir is not None + if os.path.isdir(src_file_or_folder): + src_folder = src_file_or_folder + else: + extract(src_file_or_folder + reference_ext, dest_dir=os.path.join(tmpdir, "src")) + src_folder = os.path.join(tmpdir, "src") + + converted_folder = os.path.join(tmpdir, "converted") + extract(subject, dest_dir=converted_folder) + + missing_files = set() + mismatch_size = set() + for root, dirs, files in os.walk(src_folder): + for f in files: + absfile = os.path.join(root, f) + rp = os.path.relpath(absfile, src_folder) + destpath = os.path.join(converted_folder, rp) + if not os.path.islink(destpath): + if not os.path.isfile(destpath): + missing_files.add(rp) + elif os.stat(absfile).st_size != os.stat(destpath).st_size: + mismatch_size.add(rp) + return src_file_or_folder, missing_files, mismatch_size + + +def hash_fn(): + return hashlib.blake2b() + + +IGNORE_FIELDS = { + "uid", + "gid", + "mtime", + "uname", + "gname", + "chksum", +} #: ignore if not strict + + +def validate_converted_files_match_streaming( + src: str | Path, reference: str | Path, *, strict=True +): + """ + Check that two .tar.bz2 or .conda files (either of src_file and + reference_file can be either format) match exactly, down to the timestamps + etc. + + Does not check outside of the info- and pkg- components of a .conda. + (conda's metadata.json, which gives the version "2" of the format) + + If strict = True, also check for matching uid, gid, mtime, uname, gname. + """ + source_set = {} + reference_set = {} + ignore_fields = {"chksum"} if strict else IGNORE_FIELDS + + def get_fileset(filename: str | Path): + fileset = {} + components = ["info", "pkg"] if os.fspath(filename).endswith(".conda") else ["pkg"] + with open(filename, "rb") as conda_file: + for component in components: + for tar, member in package_streaming.stream_conda_component( + filename, conda_file, component + ): + info = {k: v for k, v in member.get_info().items() if k not in ignore_fields} + + if member.isfile(): + hasher = hash_fn() + fd = tar.extractfile(member) + assert fd is not None + for block in iter(lambda: fd.read(1 << 18), b""): # type: ignore + hasher.update(block) + + info["digest"] = hasher.hexdigest() + + fileset[info["name"]] = info + + return fileset + + source_set = get_fileset(src) + reference_set = get_fileset(reference) + + missing = [] + mismatched = [] + + if source_set != reference_set: + for file in chain(source_set, reference_set): + if not (file in source_set and file in reference_set): + missing.append(file) + elif source_set[file] != reference_set[file]: + mismatched.append(file) + + return src, missing, mismatched diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/distutils-precedence.pth b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/distutils-precedence.pth new file mode 100644 index 0000000000000000000000000000000000000000..c659194195f07bd6f19b5522515551309af14a3d --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/distutils-precedence.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2638ce9e2500e572a5e0de7faed6661eb569d1b696fcba07b0dd223da5f5d224 +size 151 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/future/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/future/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad419d67e2740d0c669286c3b5f5177c84b89795 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/future/__init__.py @@ -0,0 +1,93 @@ +""" +future: Easy, safe support for Python 2/3 compatibility +======================================================= + +``future`` is the missing compatibility layer between Python 2 and Python +3. It allows you to use a single, clean Python 3.x-compatible codebase to +support both Python 2 and Python 3 with minimal overhead. + +It is designed to be used as follows:: + + from __future__ import (absolute_import, division, + print_function, unicode_literals) + from builtins import ( + bytes, dict, int, list, object, range, str, + ascii, chr, hex, input, next, oct, open, + pow, round, super, + filter, map, zip) + +followed by predominantly standard, idiomatic Python 3 code that then runs +similarly on Python 2.6/2.7 and Python 3.3+. + +The imports have no effect on Python 3. On Python 2, they shadow the +corresponding builtins, which normally have different semantics on Python 3 +versus 2, to provide their Python 3 semantics. + + +Standard library reorganization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``future`` supports the standard library reorganization (PEP 3108) through the +following Py3 interfaces: + + >>> # Top-level packages with Py3 names provided on Py2: + >>> import html.parser + >>> import queue + >>> import tkinter.dialog + >>> import xmlrpc.client + >>> # etc. + + >>> # Aliases provided for extensions to existing Py2 module names: + >>> from future.standard_library import install_aliases + >>> install_aliases() + + >>> from collections import Counter, OrderedDict # backported to Py2.6 + >>> from collections import UserDict, UserList, UserString + >>> import urllib.request + >>> from itertools import filterfalse, zip_longest + >>> from subprocess import getoutput, getstatusoutput + + +Automatic conversion +-------------------- + +An included script called `futurize +`_ aids in converting +code (from either Python 2 or Python 3) to code compatible with both +platforms. It is similar to ``python-modernize`` but goes further in +providing Python 3 compatibility through the use of the backported types +and builtin functions in ``future``. + + +Documentation +------------- + +See: http://python-future.org + + +Credits +------- + +:Author: Ed Schofield, Jordan M. Adler, et al +:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte + Ltd, Singapore. http://pythoncharmers.com +:Others: See docs/credits.rst or http://python-future.org/credits.html + + +Licensing +--------- +Copyright 2013-2019 Python Charmers Pty Ltd, Australia. +The software is distributed under an MIT licence. See LICENSE.txt. + +""" + +__title__ = 'future' +__author__ = 'Ed Schofield' +__license__ = 'MIT' +__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd' +__ver_major__ = 0 +__ver_minor__ = 18 +__ver_patch__ = 2 +__ver_sub__ = '' +__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, + __ver_patch__, __ver_sub__) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/METADATA b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..1815b243bc8480c1df3c73a79e1dda1ff74ea2f0 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/METADATA @@ -0,0 +1,530 @@ +Metadata-Version: 2.1 +Name: psutil +Version: 5.9.8 +Summary: Cross-platform lib for process and system monitoring in Python. +Home-page: https://github.com/giampaolo/psutil +Author: Giampaolo Rodola +Author-email: g.rodola@gmail.com +License: BSD-3-Clause +Keywords: ps,top,kill,free,lsof,netstat,nice,tty,ionice,uptime,taskmgr,process,df,iotop,iostat,ifconfig,taskset,who,pidof,pmap,smem,pstree,monitoring,ulimit,prlimit,smem,performance,metrics,agent,observability +Platform: Platform Independent +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Environment :: Win32 (MS Windows) +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Intended Audience :: System Administrators +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows :: Windows 10 +Classifier: Operating System :: Microsoft :: Windows :: Windows 7 +Classifier: Operating System :: Microsoft :: Windows :: Windows 8 +Classifier: Operating System :: Microsoft :: Windows :: Windows 8.1 +Classifier: Operating System :: Microsoft :: Windows :: Windows Server 2003 +Classifier: Operating System :: Microsoft :: Windows :: Windows Server 2008 +Classifier: Operating System :: Microsoft :: Windows :: Windows Vista +Classifier: Operating System :: Microsoft +Classifier: Operating System :: OS Independent +Classifier: Operating System :: POSIX :: AIX +Classifier: Operating System :: POSIX :: BSD :: FreeBSD +Classifier: Operating System :: POSIX :: BSD :: NetBSD +Classifier: Operating System :: POSIX :: BSD :: OpenBSD +Classifier: Operating System :: POSIX :: BSD +Classifier: Operating System :: POSIX :: Linux +Classifier: Operating System :: POSIX :: SunOS/Solaris +Classifier: Operating System :: POSIX +Classifier: Programming Language :: C +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: System :: Benchmark +Classifier: Topic :: System :: Hardware :: Hardware Drivers +Classifier: Topic :: System :: Hardware +Classifier: Topic :: System :: Monitoring +Classifier: Topic :: System :: Networking :: Monitoring :: Hardware Watchdog +Classifier: Topic :: System :: Networking :: Monitoring +Classifier: Topic :: System :: Networking +Classifier: Topic :: System :: Operating System +Classifier: Topic :: System :: Systems Administration +Classifier: Topic :: Utilities +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.* +Description-Content-Type: text/x-rst +License-File: LICENSE +Provides-Extra: test +Requires-Dist: ipaddress ; (python_version < "3.0") and extra == 'test' +Requires-Dist: mock ; (python_version < "3.0") and extra == 'test' +Requires-Dist: enum34 ; (python_version <= "3.4") and extra == 'test' +Requires-Dist: pywin32 ; (sys_platform == "win32") and extra == 'test' +Requires-Dist: wmi ; (sys_platform == "win32") and extra == 'test' + +| |downloads| |stars| |forks| |contributors| |coverage| +| |version| |py-versions| |packages| |license| +| |github-actions-wheels| |github-actions-bsd| |appveyor| |doc| |twitter| |tidelift| + +.. |downloads| image:: https://img.shields.io/pypi/dm/psutil.svg + :target: https://pepy.tech/project/psutil + :alt: Downloads + +.. |stars| image:: https://img.shields.io/github/stars/giampaolo/psutil.svg + :target: https://github.com/giampaolo/psutil/stargazers + :alt: Github stars + +.. |forks| image:: https://img.shields.io/github/forks/giampaolo/psutil.svg + :target: https://github.com/giampaolo/psutil/network/members + :alt: Github forks + +.. |contributors| image:: https://img.shields.io/github/contributors/giampaolo/psutil.svg + :target: https://github.com/giampaolo/psutil/graphs/contributors + :alt: Contributors + +.. |github-actions-wheels| image:: https://img.shields.io/github/actions/workflow/status/giampaolo/psutil/.github/workflows/build.yml?label=Linux%2C%20macOS%2C%20Windows + :target: https://github.com/giampaolo/psutil/actions?query=workflow%3Abuild + :alt: Linux, macOS, Windows + +.. |github-actions-bsd| image:: https://img.shields.io/github/actions/workflow/status/giampaolo/psutil/.github/workflows/bsd.yml?label=FreeBSD,%20NetBSD,%20OpenBSD + :target: https://github.com/giampaolo/psutil/actions?query=workflow%3Absd-tests + :alt: FreeBSD, NetBSD, OpenBSD + +.. |appveyor| image:: https://img.shields.io/appveyor/build/giampaolo/psutil/master.svg?maxAge=3600&label=Windows%20(py2) + :target: https://ci.appveyor.com/project/giampaolo/psutil + :alt: Windows (Appveyor) + +.. |coverage| image:: https://coveralls.io/repos/github/giampaolo/psutil/badge.svg?branch=master + :target: https://coveralls.io/github/giampaolo/psutil?branch=master + :alt: Test coverage (coverall.io) + +.. |doc| image:: https://readthedocs.org/projects/psutil/badge/?version=latest + :target: https://psutil.readthedocs.io/en/latest/ + :alt: Documentation Status + +.. |version| image:: https://img.shields.io/pypi/v/psutil.svg?label=pypi + :target: https://pypi.org/project/psutil + :alt: Latest version + +.. |py-versions| image:: https://img.shields.io/pypi/pyversions/psutil.svg + :alt: Supported Python versions + +.. |packages| image:: https://repology.org/badge/tiny-repos/python:psutil.svg + :target: https://repology.org/metapackage/python:psutil/versions + :alt: Binary packages + +.. |license| image:: https://img.shields.io/pypi/l/psutil.svg + :target: https://github.com/giampaolo/psutil/blob/master/LICENSE + :alt: License + +.. |twitter| image:: https://img.shields.io/twitter/follow/grodola.svg?label=follow&style=flat&logo=twitter&logoColor=4FADFF + :target: https://twitter.com/grodola + :alt: Twitter Follow + +.. |tidelift| image:: https://tidelift.com/badges/github/giampaolo/psutil?style=flat + :target: https://tidelift.com/subscription/pkg/pypi-psutil?utm_source=pypi-psutil&utm_medium=referral&utm_campaign=readme + :alt: Tidelift + +----- + +Quick links +=========== + +- `Home page `_ +- `Install `_ +- `Documentation `_ +- `Download `_ +- `Forum `_ +- `StackOverflow `_ +- `Blog `_ +- `What's new `_ + + +Summary +======= + +psutil (process and system utilities) is a cross-platform library for +retrieving information on **running processes** and **system utilization** +(CPU, memory, disks, network, sensors) in Python. +It is useful mainly for **system monitoring**, **profiling and limiting process +resources** and **management of running processes**. +It implements many functionalities offered by classic UNIX command line tools +such as *ps, top, iotop, lsof, netstat, ifconfig, free* and others. +psutil currently supports the following platforms: + +- **Linux** +- **Windows** +- **macOS** +- **FreeBSD, OpenBSD**, **NetBSD** +- **Sun Solaris** +- **AIX** + +Supported Python versions are **2.7**, **3.6+** and +`PyPy `__. + +Funding +======= + +While psutil is free software and will always be, the project would benefit +immensely from some funding. +Keeping up with bug reports and maintenance has become hardly sustainable for +me alone in terms of time. +If you're a company that's making significant use of psutil you can consider +becoming a sponsor via `GitHub Sponsors `__, +`Open Collective `__ or +`PayPal `__ +and have your logo displayed in here and psutil `doc `__. + +Sponsors +======== + +.. image:: https://github.com/giampaolo/psutil/raw/master/docs/_static/tidelift-logo.png + :width: 200 + :alt: Alternative text + +`Add your logo `__. + +Example usages +============== + +This represents pretty much the whole psutil API. + +CPU +--- + +.. code-block:: python + + >>> import psutil + >>> + >>> psutil.cpu_times() + scputimes(user=3961.46, nice=169.729, system=2150.659, idle=16900.540, iowait=629.59, irq=0.0, softirq=19.42, steal=0.0, guest=0, guest_nice=0.0) + >>> + >>> for x in range(3): + ... psutil.cpu_percent(interval=1) + ... + 4.0 + 5.9 + 3.8 + >>> + >>> for x in range(3): + ... psutil.cpu_percent(interval=1, percpu=True) + ... + [4.0, 6.9, 3.7, 9.2] + [7.0, 8.5, 2.4, 2.1] + [1.2, 9.0, 9.9, 7.2] + >>> + >>> for x in range(3): + ... psutil.cpu_times_percent(interval=1, percpu=False) + ... + scputimes(user=1.5, nice=0.0, system=0.5, idle=96.5, iowait=1.5, irq=0.0, softirq=0.0, steal=0.0, guest=0.0, guest_nice=0.0) + scputimes(user=1.0, nice=0.0, system=0.0, idle=99.0, iowait=0.0, irq=0.0, softirq=0.0, steal=0.0, guest=0.0, guest_nice=0.0) + scputimes(user=2.0, nice=0.0, system=0.0, idle=98.0, iowait=0.0, irq=0.0, softirq=0.0, steal=0.0, guest=0.0, guest_nice=0.0) + >>> + >>> psutil.cpu_count() + 4 + >>> psutil.cpu_count(logical=False) + 2 + >>> + >>> psutil.cpu_stats() + scpustats(ctx_switches=20455687, interrupts=6598984, soft_interrupts=2134212, syscalls=0) + >>> + >>> psutil.cpu_freq() + scpufreq(current=931.42925, min=800.0, max=3500.0) + >>> + >>> psutil.getloadavg() # also on Windows (emulated) + (3.14, 3.89, 4.67) + +Memory +------ + +.. code-block:: python + + >>> psutil.virtual_memory() + svmem(total=10367352832, available=6472179712, percent=37.6, used=8186245120, free=2181107712, active=4748992512, inactive=2758115328, buffers=790724608, cached=3500347392, shared=787554304) + >>> psutil.swap_memory() + sswap(total=2097147904, used=296128512, free=1801019392, percent=14.1, sin=304193536, sout=677842944) + >>> + +Disks +----- + +.. code-block:: python + + >>> psutil.disk_partitions() + [sdiskpart(device='/dev/sda1', mountpoint='/', fstype='ext4', opts='rw,nosuid', maxfile=255, maxpath=4096), + sdiskpart(device='/dev/sda2', mountpoint='/home', fstype='ext', opts='rw', maxfile=255, maxpath=4096)] + >>> + >>> psutil.disk_usage('/') + sdiskusage(total=21378641920, used=4809781248, free=15482871808, percent=22.5) + >>> + >>> psutil.disk_io_counters(perdisk=False) + sdiskio(read_count=719566, write_count=1082197, read_bytes=18626220032, write_bytes=24081764352, read_time=5023392, write_time=63199568, read_merged_count=619166, write_merged_count=812396, busy_time=4523412) + >>> + +Network +------- + +.. code-block:: python + + >>> psutil.net_io_counters(pernic=True) + {'eth0': netio(bytes_sent=485291293, bytes_recv=6004858642, packets_sent=3251564, packets_recv=4787798, errin=0, errout=0, dropin=0, dropout=0), + 'lo': netio(bytes_sent=2838627, bytes_recv=2838627, packets_sent=30567, packets_recv=30567, errin=0, errout=0, dropin=0, dropout=0)} + >>> + >>> psutil.net_connections(kind='tcp') + [sconn(fd=115, family=, type=, laddr=addr(ip='10.0.0.1', port=48776), raddr=addr(ip='93.186.135.91', port=80), status='ESTABLISHED', pid=1254), + sconn(fd=117, family=, type=, laddr=addr(ip='10.0.0.1', port=43761), raddr=addr(ip='72.14.234.100', port=80), status='CLOSING', pid=2987), + ...] + >>> + >>> psutil.net_if_addrs() + {'lo': [snicaddr(family=, address='127.0.0.1', netmask='255.0.0.0', broadcast='127.0.0.1', ptp=None), + snicaddr(family=, address='::1', netmask='ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', broadcast=None, ptp=None), + snicaddr(family=, address='00:00:00:00:00:00', netmask=None, broadcast='00:00:00:00:00:00', ptp=None)], + 'wlan0': [snicaddr(family=, address='192.168.1.3', netmask='255.255.255.0', broadcast='192.168.1.255', ptp=None), + snicaddr(family=, address='fe80::c685:8ff:fe45:641%wlan0', netmask='ffff:ffff:ffff:ffff::', broadcast=None, ptp=None), + snicaddr(family=, address='c4:85:08:45:06:41', netmask=None, broadcast='ff:ff:ff:ff:ff:ff', ptp=None)]} + >>> + >>> psutil.net_if_stats() + {'lo': snicstats(isup=True, duplex=, speed=0, mtu=65536, flags='up,loopback,running'), + 'wlan0': snicstats(isup=True, duplex=, speed=100, mtu=1500, flags='up,broadcast,running,multicast')} + >>> + +Sensors +------- + +.. code-block:: python + + >>> import psutil + >>> psutil.sensors_temperatures() + {'acpitz': [shwtemp(label='', current=47.0, high=103.0, critical=103.0)], + 'asus': [shwtemp(label='', current=47.0, high=None, critical=None)], + 'coretemp': [shwtemp(label='Physical id 0', current=52.0, high=100.0, critical=100.0), + shwtemp(label='Core 0', current=45.0, high=100.0, critical=100.0)]} + >>> + >>> psutil.sensors_fans() + {'asus': [sfan(label='cpu_fan', current=3200)]} + >>> + >>> psutil.sensors_battery() + sbattery(percent=93, secsleft=16628, power_plugged=False) + >>> + +Other system info +----------------- + +.. code-block:: python + + >>> import psutil + >>> psutil.users() + [suser(name='giampaolo', terminal='pts/2', host='localhost', started=1340737536.0, pid=1352), + suser(name='giampaolo', terminal='pts/3', host='localhost', started=1340737792.0, pid=1788)] + >>> + >>> psutil.boot_time() + 1365519115.0 + >>> + +Process management +------------------ + +.. code-block:: python + + >>> import psutil + >>> psutil.pids() + [1, 2, 3, 4, 5, 6, 7, 46, 48, 50, 51, 178, 182, 222, 223, 224, 268, 1215, + 1216, 1220, 1221, 1243, 1244, 1301, 1601, 2237, 2355, 2637, 2774, 3932, + 4176, 4177, 4185, 4187, 4189, 4225, 4243, 4245, 4263, 4282, 4306, 4311, + 4312, 4313, 4314, 4337, 4339, 4357, 4358, 4363, 4383, 4395, 4408, 4433, + 4443, 4445, 4446, 5167, 5234, 5235, 5252, 5318, 5424, 5644, 6987, 7054, + 7055, 7071] + >>> + >>> p = psutil.Process(7055) + >>> p + psutil.Process(pid=7055, name='python3', status='running', started='09:04:44') + >>> p.pid + 7055 + >>> p.name() + 'python3' + >>> p.exe() + '/usr/bin/python3' + >>> p.cwd() + '/home/giampaolo' + >>> p.cmdline() + ['/usr/bin/python3', 'main.py'] + >>> + >>> p.ppid() + 7054 + >>> p.parent() + psutil.Process(pid=4699, name='bash', status='sleeping', started='09:06:44') + >>> p.parents() + [psutil.Process(pid=4699, name='bash', started='09:06:44'), + psutil.Process(pid=4689, name='gnome-terminal-server', status='sleeping', started='0:06:44'), + psutil.Process(pid=1, name='systemd', status='sleeping', started='05:56:55')] + >>> p.children(recursive=True) + [psutil.Process(pid=29835, name='python3', status='sleeping', started='11:45:38'), + psutil.Process(pid=29836, name='python3', status='waking', started='11:43:39')] + >>> + >>> p.status() + 'running' + >>> p.create_time() + 1267551141.5019531 + >>> p.terminal() + '/dev/pts/0' + >>> + >>> p.username() + 'giampaolo' + >>> p.uids() + puids(real=1000, effective=1000, saved=1000) + >>> p.gids() + pgids(real=1000, effective=1000, saved=1000) + >>> + >>> p.cpu_times() + pcputimes(user=1.02, system=0.31, children_user=0.32, children_system=0.1, iowait=0.0) + >>> p.cpu_percent(interval=1.0) + 12.1 + >>> p.cpu_affinity() + [0, 1, 2, 3] + >>> p.cpu_affinity([0, 1]) # set + >>> p.cpu_num() + 1 + >>> + >>> p.memory_info() + pmem(rss=10915840, vms=67608576, shared=3313664, text=2310144, lib=0, data=7262208, dirty=0) + >>> p.memory_full_info() # "real" USS memory usage (Linux, macOS, Win only) + pfullmem(rss=10199040, vms=52133888, shared=3887104, text=2867200, lib=0, data=5967872, dirty=0, uss=6545408, pss=6872064, swap=0) + >>> p.memory_percent() + 0.7823 + >>> p.memory_maps() + [pmmap_grouped(path='/lib/x8664-linux-gnu/libutil-2.15.so', rss=32768, size=2125824, pss=32768, shared_clean=0, shared_dirty=0, private_clean=20480, private_dirty=12288, referenced=32768, anonymous=12288, swap=0), + pmmap_grouped(path='/lib/x8664-linux-gnu/libc-2.15.so', rss=3821568, size=3842048, pss=3821568, shared_clean=0, shared_dirty=0, private_clean=0, private_dirty=3821568, referenced=3575808, anonymous=3821568, swap=0), + pmmap_grouped(path='[heap]', rss=32768, size=139264, pss=32768, shared_clean=0, shared_dirty=0, private_clean=0, private_dirty=32768, referenced=32768, anonymous=32768, swap=0), + pmmap_grouped(path='[stack]', rss=2465792, size=2494464, pss=2465792, shared_clean=0, shared_dirty=0, private_clean=0, private_dirty=2465792, referenced=2277376, anonymous=2465792, swap=0), + ...] + >>> + >>> p.io_counters() + pio(read_count=478001, write_count=59371, read_bytes=700416, write_bytes=69632, read_chars=456232, write_chars=517543) + >>> + >>> p.open_files() + [popenfile(path='/home/giampaolo/monit.py', fd=3, position=0, mode='r', flags=32768), + popenfile(path='/var/log/monit.log', fd=4, position=235542, mode='a', flags=33793)] + >>> + >>> p.connections(kind='tcp') + [pconn(fd=115, family=, type=, laddr=addr(ip='10.0.0.1', port=48776), raddr=addr(ip='93.186.135.91', port=80), status='ESTABLISHED'), + pconn(fd=117, family=, type=, laddr=addr(ip='10.0.0.1', port=43761), raddr=addr(ip='72.14.234.100', port=80), status='CLOSING')] + >>> + >>> p.threads() + [pthread(id=5234, user_time=22.5, system_time=9.2891), + pthread(id=5237, user_time=0.0707, system_time=1.1)] + >>> + >>> p.num_threads() + 4 + >>> p.num_fds() + 8 + >>> p.num_ctx_switches() + pctxsw(voluntary=78, involuntary=19) + >>> + >>> p.nice() + 0 + >>> p.nice(10) # set + >>> + >>> p.ionice(psutil.IOPRIO_CLASS_IDLE) # IO priority (Win and Linux only) + >>> p.ionice() + pionice(ioclass=, value=0) + >>> + >>> p.rlimit(psutil.RLIMIT_NOFILE, (5, 5)) # set resource limits (Linux only) + >>> p.rlimit(psutil.RLIMIT_NOFILE) + (5, 5) + >>> + >>> p.environ() + {'LC_PAPER': 'it_IT.UTF-8', 'SHELL': '/bin/bash', 'GREP_OPTIONS': '--color=auto', + 'XDG_CONFIG_DIRS': '/etc/xdg/xdg-ubuntu:/usr/share/upstart/xdg:/etc/xdg', + ...} + >>> + >>> p.as_dict() + {'status': 'running', 'num_ctx_switches': pctxsw(voluntary=63, involuntary=1), 'pid': 5457, ...} + >>> p.is_running() + True + >>> p.suspend() + >>> p.resume() + >>> + >>> p.terminate() + >>> p.kill() + >>> p.wait(timeout=3) + + >>> + >>> psutil.test() + USER PID %CPU %MEM VSZ RSS TTY START TIME COMMAND + root 1 0.0 0.0 24584 2240 Jun17 00:00 init + root 2 0.0 0.0 0 0 Jun17 00:00 kthreadd + ... + giampaolo 31475 0.0 0.0 20760 3024 /dev/pts/0 Jun19 00:00 python2.4 + giampaolo 31721 0.0 2.2 773060 181896 00:04 10:30 chrome + root 31763 0.0 0.0 0 0 00:05 00:00 kworker/0:1 + >>> + +Further process APIs +-------------------- + +.. code-block:: python + + >>> import psutil + >>> for proc in psutil.process_iter(['pid', 'name']): + ... print(proc.info) + ... + {'pid': 1, 'name': 'systemd'} + {'pid': 2, 'name': 'kthreadd'} + {'pid': 3, 'name': 'ksoftirqd/0'} + ... + >>> + >>> psutil.pid_exists(3) + True + >>> + >>> def on_terminate(proc): + ... print("process {} terminated".format(proc)) + ... + >>> # waits for multiple processes to terminate + >>> gone, alive = psutil.wait_procs(procs_list, timeout=3, callback=on_terminate) + >>> + +Windows services +---------------- + +.. code-block:: python + + >>> list(psutil.win_service_iter()) + [, + , + , + , + ...] + >>> s = psutil.win_service_get('alg') + >>> s.as_dict() + {'binpath': 'C:\\Windows\\System32\\alg.exe', + 'description': 'Provides support for 3rd party protocol plug-ins for Internet Connection Sharing', + 'display_name': 'Application Layer Gateway Service', + 'name': 'alg', + 'pid': None, + 'start_type': 'manual', + 'status': 'stopped', + 'username': 'NT AUTHORITY\\LocalService'} + +Projects using psutil +===================== + +Here's some I find particularly interesting: + +- https://github.com/google/grr +- https://github.com/facebook/osquery/ +- https://github.com/nicolargo/glances +- https://github.com/aristocratos/bpytop +- https://github.com/Jahaja/psdash +- https://github.com/ajenti/ajenti +- https://github.com/home-assistant/home-assistant/ + +Portings +======== + +- Go: https://github.com/shirou/gopsutil +- C: https://github.com/hamon-in/cpslib +- Rust: https://github.com/rust-psutil/rust-psutil +- Nim: https://github.com/johnscillieri/psutil-nim + + + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/RECORD b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..beaf627b86b61973b6480fb1353ee14b1571efab --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/RECORD @@ -0,0 +1,66 @@ +psutil-5.9.8.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +psutil-5.9.8.dist-info/LICENSE,sha256=uJwGOzeG4o4MCjjxkx22H-015p3SopZvvs_-4PRsjRA,1548 +psutil-5.9.8.dist-info/METADATA,sha256=GfZ5-fidrq0yXOCgyN4funClVHk6R_zfJz-3FKx0KjU,21837 +psutil-5.9.8.dist-info/RECORD,, +psutil-5.9.8.dist-info/WHEEL,sha256=rgpVBmjjvbINeGKCkWEGd3f40VHMTsDkQj1Lgil82zE,221 +psutil-5.9.8.dist-info/top_level.txt,sha256=gCNhn57wzksDjSAISmgMJ0aiXzQulk0GJhb2-BAyYgw,7 +psutil/__init__.py,sha256=YWf_i2ZjuJqELRfNl6nX0nZuoi09GXqMZPJVsMJMCQQ,89169 +psutil/__pycache__/__init__.cpython-38.pyc,, +psutil/__pycache__/_common.cpython-38.pyc,, +psutil/__pycache__/_compat.cpython-38.pyc,, +psutil/__pycache__/_psaix.cpython-38.pyc,, +psutil/__pycache__/_psbsd.cpython-38.pyc,, +psutil/__pycache__/_pslinux.cpython-38.pyc,, +psutil/__pycache__/_psosx.cpython-38.pyc,, +psutil/__pycache__/_psposix.cpython-38.pyc,, +psutil/__pycache__/_pssunos.cpython-38.pyc,, +psutil/__pycache__/_pswindows.cpython-38.pyc,, +psutil/_common.py,sha256=BTwHxdYJQynrn5i8IOs6XFxLo9L1Eg5cgDCq6Yaypr0,29393 +psutil/_compat.py,sha256=AOF0vSCWle_sbJ1Gw-CGx0aEI9yk5u70YhPYOPg3KHs,15349 +psutil/_psaix.py,sha256=1bqEwjk6IG3Y-zrDajKi8oPSYvq3NrqpPUQeDRo4Ugg,18749 +psutil/_psbsd.py,sha256=FruAJy_GrpjAfHLpb4c3IVGfy2Xii8b1BHnIjxUfbbI,31956 +psutil/_pslinux.py,sha256=QYI6yHTvRMYZlVxB068xVAEMEAGGRSU9E7-sJD3165o,88043 +psutil/_psosx.py,sha256=d_KMSzmjL6vAYQx1fQN57b3xazJGdTt4rIysODB1r2g,16209 +psutil/_psposix.py,sha256=X9rd7WHKQ6mUAn2ihb03MCnzrBtQsrPRkCouExmuagQ,8235 +psutil/_pssunos.py,sha256=Zx6eLY-0NRUFFIKP7SycktgDoottdnlA9aX8y4e74dY,25559 +psutil/_psutil_linux.abi3.so,sha256=onwm8BWn6axbKjPqB263JHm6rbDcEvMncKYhhpbu5I4,115304 +psutil/_psutil_posix.abi3.so,sha256=xNVKy1LdKcnigEe_BpaXfYEg4qad9MkedLwkYh4BaBk,71624 +psutil/_pswindows.py,sha256=BXgoASpIS6ccw5jTx4V-H2fMsIcSv_NQ6EozsxFgY-0,37734 +psutil/tests/__init__.py,sha256=gc621Vvgj2NaxusB_zGwfqwP_np3qaCQqJlyPrp1D0k,64753 +psutil/tests/__main__.py,sha256=f1YY6SZebctd5Hwb1in40nFShfJw4zA2FLidsdg_eY8,269 +psutil/tests/__pycache__/__init__.cpython-38.pyc,, +psutil/tests/__pycache__/__main__.cpython-38.pyc,, +psutil/tests/__pycache__/runner.cpython-38.pyc,, +psutil/tests/__pycache__/test_aix.cpython-38.pyc,, +psutil/tests/__pycache__/test_bsd.cpython-38.pyc,, +psutil/tests/__pycache__/test_connections.cpython-38.pyc,, +psutil/tests/__pycache__/test_contracts.cpython-38.pyc,, +psutil/tests/__pycache__/test_linux.cpython-38.pyc,, +psutil/tests/__pycache__/test_memleaks.cpython-38.pyc,, +psutil/tests/__pycache__/test_misc.cpython-38.pyc,, +psutil/tests/__pycache__/test_osx.cpython-38.pyc,, +psutil/tests/__pycache__/test_posix.cpython-38.pyc,, +psutil/tests/__pycache__/test_process.cpython-38.pyc,, +psutil/tests/__pycache__/test_process_all.cpython-38.pyc,, +psutil/tests/__pycache__/test_sunos.cpython-38.pyc,, +psutil/tests/__pycache__/test_system.cpython-38.pyc,, +psutil/tests/__pycache__/test_testutils.cpython-38.pyc,, +psutil/tests/__pycache__/test_unicode.cpython-38.pyc,, +psutil/tests/__pycache__/test_windows.cpython-38.pyc,, +psutil/tests/runner.py,sha256=WtRnLZ5gS39gIysLCkeV99hw5LvodVwBGesoen9IMNs,11464 +psutil/tests/test_aix.py,sha256=8SKjFw7cR3byBShlvWAzQSOTjji5Bpnk8JyUksR0AQI,4585 +psutil/tests/test_bsd.py,sha256=kfNXLsZ1p-VoGtVX4At9qMOS_zN8OMVP9yecuHEWaC4,21245 +psutil/tests/test_connections.py,sha256=CHL65q2IYxb8ErtJAUh87RyKSCWewwmAg4VFK80arO8,21642 +psutil/tests/test_contracts.py,sha256=_TAWN7ldbgqJIdh6tMkRDivXPoH6T3jKYG47_KzDvtE,12998 +psutil/tests/test_linux.py,sha256=LPvhEPUPSQ4MdxBlARPcR4Thsd_wNqKkqj0JqlKkZCo,92530 +psutil/tests/test_memleaks.py,sha256=pzwEMUaz6Xh8AmN_qiCmYTU2yzot_dwsdbTfFdK89Vk,15012 +psutil/tests/test_misc.py,sha256=JovYMJNShu2yC3t2sfkFY5uW96TL7K5gkwXnjOR25aQ,35117 +psutil/tests/test_osx.py,sha256=MQsepO25TlfydaOxjqgEBM4Wri9SnCUkpBLEiQnKv2Q,6603 +psutil/tests/test_posix.py,sha256=sDam9vdJJWuYNddovhxi1c3K-z4AfTAD0fQyyaeD8YY,17387 +psutil/tests/test_process.py,sha256=lOq3nOyQQvKRJRaTREO0GB0ErOQGZeqxHsEbn5qs8P4,61535 +psutil/tests/test_process_all.py,sha256=qkLXnkVVYEqM2IhhID0CzEVWAmD4Ib7i0E4mUjo8J_Y,16112 +psutil/tests/test_sunos.py,sha256=NhZsHABJKjCEDzGA7ZL62s6NlqpJwpoDybaPB2Bm9HE,1310 +psutil/tests/test_system.py,sha256=rfzGP_ZuMHxzuTz3CuVxn8-zXNe5_9YCm1Z5JTKXqT8,37044 +psutil/tests/test_testutils.py,sha256=mBbsRBbUf4VmDrKTbvlSeVqo_5HTM9h4eTE_VZ_5eXc,14828 +psutil/tests/test_unicode.py,sha256=lKytzxNA72Zdhhz6jnXjVRm-3-79j1zeliPO1SMSVaE,12549 +psutil/tests/test_windows.py,sha256=D_fKdhp8rsXDF7OrBXLRPS7XMpqXAzqJzbrjPqlL5pE,35298 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/top_level.txt b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4d92cc08db6a0d8bfedbbbd620d1fb11f84677b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/psutil-5.9.8.dist-info/top_level.txt @@ -0,0 +1 @@ +psutil diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/INSTALLER b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/LICENSE b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..de09f408cec1f6d08308ea79fcff9e156468325c --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2014 Kenneth Reitz. + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/METADATA b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..975ce567fcd85dbf86a7ecdfa1d91cd77010f526 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/METADATA @@ -0,0 +1,245 @@ +Metadata-Version: 2.1 +Name: requests-oauthlib +Version: 1.3.1 +Summary: OAuthlib authentication support for Requests. +Home-page: https://github.com/requests/requests-oauthlib +Author: Kenneth Reitz +Author-email: me@kennethreitz.com +License: ISC +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Description-Content-Type: text/x-rst +License-File: LICENSE +Requires-Dist: oauthlib (>=3.0.0) +Requires-Dist: requests (>=2.0.0) +Provides-Extra: rsa +Requires-Dist: oauthlib[signedtoken] (>=3.0.0) ; extra == 'rsa' + +Requests-OAuthlib |build-status| |coverage-status| |docs| +========================================================= + +This project provides first-class OAuth library support for `Requests `_. + +The OAuth 1 workflow +-------------------- + +OAuth 1 can seem overly complicated and it sure has its quirks. Luckily, +requests_oauthlib hides most of these and let you focus at the task at hand. + +Accessing protected resources using requests_oauthlib is as simple as: + +.. code-block:: pycon + + >>> from requests_oauthlib import OAuth1Session + >>> twitter = OAuth1Session('client_key', + client_secret='client_secret', + resource_owner_key='resource_owner_key', + resource_owner_secret='resource_owner_secret') + >>> url = 'https://api.twitter.com/1/account/settings.json' + >>> r = twitter.get(url) + +Before accessing resources you will need to obtain a few credentials from your +provider (e.g. Twitter) and authorization from the user for whom you wish to +retrieve resources for. You can read all about this in the full +`OAuth 1 workflow guide on RTD `_. + +The OAuth 2 workflow +-------------------- + +OAuth 2 is generally simpler than OAuth 1 but comes in more flavours. The most +common being the Authorization Code Grant, also known as the WebApplication +flow. + +Fetching a protected resource after obtaining an access token can be extremely +simple. However, before accessing resources you will need to obtain a few +credentials from your provider (e.g. Google) and authorization from the user +for whom you wish to retrieve resources for. You can read all about this in the +full `OAuth 2 workflow guide on RTD `_. + +Installation +------------- + +To install requests and requests_oauthlib you can use pip: + +.. code-block:: bash + + $ pip install requests requests_oauthlib + +.. |build-status| image:: https://github.com/requests/requests-oauthlib/actions/workflows/run-tests.yml/badge.svg + :target: https://github.com/requests/requests-oauthlib/actions +.. |coverage-status| image:: https://img.shields.io/coveralls/requests/requests-oauthlib.svg + :target: https://coveralls.io/r/requests/requests-oauthlib +.. |docs| image:: https://readthedocs.org/projects/requests-oauthlib/badge/ + :alt: Documentation Status + :scale: 100% + :target: https://requests-oauthlib.readthedocs.io/ + + +History +------- + +v1.3.1 (21 January 2022) +++++++++++++++++++++++++ + +- Add initial support for OAuth Mutual TLS (draft-ietf-oauth-mtls) +- Add eBay compliance fix +- Add Spotify OAuth 2 Tutorial +- Add support for python 3.8, 3.9 +- Fixed LinkedIn Compliance Fixes +- Fixed ReadTheDocs Documentation and sphinx errors +- Moved pipeline to GitHub Actions + +v1.3.0 (6 November 2019) +++++++++++++++++++++++++ + +- Instagram compliance fix +- Added ``force_querystring`` argument to fetch_token() method on OAuth2Session + +v1.2.0 (14 January 2019) +++++++++++++++++++++++++ + +- This project now depends on OAuthlib 3.0.0 and above. It does **not** support + versions of OAuthlib before 3.0.0. +- Updated oauth2 tests to use 'sess' for an OAuth2Session instance instead of `auth` + because OAuth2Session objects and methods acceept an `auth` paramether which is + typically an instance of `requests.auth.HTTPBasicAuth` +- `OAuth2Session.fetch_token` previously tried to guess how and where to provide + "client" and "user" credentials incorrectly. This was incompatible with some + OAuth servers and incompatible with breaking changes in oauthlib that seek to + correctly provide the `client_id`. The older implementation also did not raise + the correct exceptions when username and password are not present on Legacy + clients. +- Avoid automatic netrc authentication for OAuth2Session. + +v1.1.0 (9 January 2019) ++++++++++++++++++++++++ + +- Adjusted version specifier for ``oauthlib`` dependency: this project is + not yet compatible with ``oauthlib`` 3.0.0. +- Dropped dependency on ``nose``. +- Minor changes to clean up the code and make it more readable/maintainable. + +v1.0.0 (4 June 2018) +++++++++++++++++++++ + +- **Removed support for Python 2.6 and Python 3.3.** + This project now supports Python 2.7, and Python 3.4 and above. +- Added several examples to the documentation. +- Added plentymarkets compliance fix. +- Added a ``token`` property to OAuth1Session, to match the corresponding + ``token`` property on OAuth2Session. + +v0.8.0 (14 February 2017) ++++++++++++++++++++++++++ + +- Added Fitbit compliance fix. +- Fixed an issue where newlines in the response body for the access token + request would cause errors when trying to extract the token. +- Fixed an issue introduced in v0.7.0 where users passing ``auth`` to several + methods would encounter conflicts with the ``client_id`` and + ``client_secret``-derived auth. The user-supplied ``auth`` argument is now + used in preference to those options. + +v0.7.0 (22 September 2016) +++++++++++++++++++++++++++ + +- Allowed ``OAuth2Session.request`` to take the ``client_id`` and + ``client_secret`` parameters for the purposes of automatic token refresh, + which may need them. + +v0.6.2 (12 July 2016) ++++++++++++++++++++++ + +- Use ``client_id`` and ``client_secret`` for the Authorization header if + provided. +- Allow explicit bypass of the Authorization header by setting ``auth=False``. +- Pass through the ``proxies`` kwarg when refreshing tokens. +- Miscellaneous cleanups. + +v0.6.1 (19 February 2016) ++++++++++++++++++++++++++ + +- Fixed a bug when sending authorization in headers with no username and + password present. +- Make sure we clear the session token before obtaining a new one. +- Some improvements to the Slack compliance fix. +- Avoid timing problems around token refresh. +- Allow passing arbitrary arguments to requests when calling + ``fetch_request_token`` and ``fetch_access_token``. + +v0.6.0 (14 December 2015) ++++++++++++++++++++++++++ + +- Add compliance fix for Slack. +- Add compliance fix for Mailchimp. +- ``TokenRequestDenied`` exceptions now carry the entire response, not just the + status code. +- Pass through keyword arguments when refreshing tokens automatically. +- Send authorization in headers, not just body, to maximize compatibility. +- More getters/setters available for OAuth2 session client values. +- Allow sending custom headers when refreshing tokens, and set some defaults. + + +v0.5.0 (4 May 2015) ++++++++++++++++++++ +- Fix ``TypeError`` being raised instead of ``TokenMissing`` error. +- Raise requests exceptions on 4XX and 5XX responses in the OAuth2 flow. +- Avoid ``AttributeError`` when initializing the ``OAuth2Session`` class + without complete client information. + +v0.4.2 (16 October 2014) +++++++++++++++++++++++++ +- New ``authorized`` property on OAuth1Session and OAuth2Session, which allows + you to easily determine if the session is already authorized with OAuth tokens + or not. +- New ``TokenMissing`` and ``VerifierMissing`` exception classes for OAuth1Session: + this will make it easier to catch and identify these exceptions. + +v0.4.1 (6 June 2014) +++++++++++++++++++++ +- New install target ``[rsa]`` for people using OAuth1 RSA-SHA1 signature + method. +- Fixed bug in OAuth2 where supplied state param was not used in auth url. +- OAuth2 HTTPS checking can be disabled by setting environment variable + ``OAUTHLIB_INSECURE_TRANSPORT``. +- OAuth1 now re-authorize upon redirects. +- OAuth1 token fetching now raise a detailed error message when the + response body is incorrectly encoded or the request was denied. +- Added support for custom OAuth1 clients. +- OAuth2 compliance fix for Sina Weibo. +- Multiple fixes to facebook compliance fix. +- Compliance fixes now re-encode body properly as bytes in Python 3. +- Logging now properly done under ``requests_oauthlib`` namespace instead + of piggybacking on oauthlib namespace. +- Logging introduced for OAuth1 auth and session. + +v0.4.0 (29 September 2013) +++++++++++++++++++++++++++ +- OAuth1Session methods only return unicode strings. #55. +- Renamed requests_oauthlib.core to requests_oauthlib.oauth1_auth for consistency. #79. +- Added Facebook compliance fix and access_token_response hook to OAuth2Session. #63. +- Added LinkedIn compliance fix. +- Added refresh_token_response compliance hook, invoked before parsing the refresh token. +- Correctly limit compliance hooks to running only once! +- Content type guessing should only be done when no content type is given +- OAuth1 now updates r.headers instead of replacing it with non case insensitive dict +- Remove last use of Response.content (in OAuth1Session). #44. +- State param can now be supplied in OAuth2Session.authorize_url + + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/RECORD b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..1ca1bff99cff1dbce926a40741238a5eef960f23 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/RECORD @@ -0,0 +1,37 @@ +requests_oauthlib-1.3.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +requests_oauthlib-1.3.1.dist-info/LICENSE,sha256=rgGEavrYqCkf5qCJZvMBWvmo_2ddhLmB-Xk8Ei94dug,745 +requests_oauthlib-1.3.1.dist-info/METADATA,sha256=xlgyoPqiXskknzuvbDx1-CiMjbjCpJ2OkxM_uQgQ6lo,10105 +requests_oauthlib-1.3.1.dist-info/RECORD,, +requests_oauthlib-1.3.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +requests_oauthlib-1.3.1.dist-info/WHEEL,sha256=z9j0xAa_JmUKMpmz72K0ZGALSM_n-wQVmGbleXx2VHg,110 +requests_oauthlib-1.3.1.dist-info/top_level.txt,sha256=vx1R42DWBO64h6iu8Gco0F01TVTPWXSRWBaV1GwVRTQ,18 +requests_oauthlib/__init__.py,sha256=j3FXxxzppgvMvHj2v9csJtfEXt5QWeUODk6TmKTtopE,529 +requests_oauthlib/__pycache__/__init__.cpython-38.pyc,, +requests_oauthlib/__pycache__/oauth1_auth.cpython-38.pyc,, +requests_oauthlib/__pycache__/oauth1_session.cpython-38.pyc,, +requests_oauthlib/__pycache__/oauth2_auth.cpython-38.pyc,, +requests_oauthlib/__pycache__/oauth2_session.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__init__.py,sha256=M0zZXYmddhdiVVtazEC23tmPYP4MDaeBabKh9FiAyGc,398 +requests_oauthlib/compliance_fixes/__pycache__/__init__.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/douban.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/ebay.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/facebook.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/fitbit.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/instagram.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/mailchimp.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/plentymarkets.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/slack.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/__pycache__/weibo.cpython-38.pyc,, +requests_oauthlib/compliance_fixes/douban.py,sha256=Ilfc1jTjCD66rPFrPf97g2D8Gb-MjoNuXqSGmaBVAn0,472 +requests_oauthlib/compliance_fixes/ebay.py,sha256=bLaGRBG4Pkoac2V4gme1-3pbRYcxvfAGyPID7K6xIZ8,890 +requests_oauthlib/compliance_fixes/facebook.py,sha256=V6_2BQnyBKcwTWDRqDTbJQcJj4Rd_XMTAlDgGiFTQf4,1119 +requests_oauthlib/compliance_fixes/fitbit.py,sha256=rgA4MrYKHqeNOfCGL0pdG3kgum6rzh1QMvgIHL_EOAE,905 +requests_oauthlib/compliance_fixes/instagram.py,sha256=LO55o9VrmeegJOCzz2-QqMlneXS6YY_33ulA_-cG1pI,948 +requests_oauthlib/compliance_fixes/mailchimp.py,sha256=nTdyttxiTxdAzbC72XZb1_uV4ebVOUvQDpDbAmBTqvQ,757 +requests_oauthlib/compliance_fixes/plentymarkets.py,sha256=HnROrco-Z9EvGEjHw3fn-7UxxC8G2GKB8Yjda3es7Yw,796 +requests_oauthlib/compliance_fixes/slack.py,sha256=YDz9DQ3ukNgbyy1X4JIPAhTKKMwMf8R8THxnzhm79zE,1453 +requests_oauthlib/compliance_fixes/weibo.py,sha256=yMvmc2xGWsaxZZNT3XYdJhDuxXJ99N6rO0n3d2cP9MA,444 +requests_oauthlib/oauth1_auth.py,sha256=yq-3SxMvPa1Ux-dm_a2mC32TDKdGGT2XJJuu3d4QtRo,3737 +requests_oauthlib/oauth1_session.py,sha256=JPThdYoaDbc-I6aH7-UZKvFZ6B1ySvBlUkMqa6TcLpE,17055 +requests_oauthlib/oauth2_auth.py,sha256=uYT7ueHDG9TYR73yIFCX8TbquOz44p21lQYPCVw4gn8,1548 +requests_oauthlib/oauth2_session.py,sha256=lIuu5PbiVYfSrEiJ38H4QWh0maKEqwVUc3sU6hXXXmE,21992 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/REQUESTED b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/WHEEL b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..0b18a281107a0448a9980396d9d324ea2aa7a7f8 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/top_level.txt b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..55d4f9073fa48c720fbd093474efb8b1e18bcf02 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/requests_oauthlib-1.3.1.dist-info/top_level.txt @@ -0,0 +1 @@ +requests_oauthlib diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/_compat.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/_compat.py new file mode 100644 index 0000000000000000000000000000000000000000..49a55392a7e40324a2b8698bfcffe93f419ea14b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/_compat.py @@ -0,0 +1,89 @@ +import sys + +from sentry_sdk._types import MYPY + +if MYPY: + from typing import Optional + from typing import Tuple + from typing import Any + from typing import Type + from typing import TypeVar + + T = TypeVar("T") + + +PY2 = sys.version_info[0] == 2 + +if PY2: + import urlparse # noqa + + text_type = unicode # noqa + + string_types = (str, text_type) + number_types = (int, long, float) # noqa + int_types = (int, long) # noqa + iteritems = lambda x: x.iteritems() # noqa: B301 + + def implements_str(cls): + # type: (T) -> T + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda x: unicode(x).encode("utf-8") # noqa + return cls + + exec("def reraise(tp, value, tb=None):\n raise tp, value, tb") + + +else: + import urllib.parse as urlparse # noqa + + text_type = str + string_types = (text_type,) # type: Tuple[type] + number_types = (int, float) # type: Tuple[type, type] + int_types = (int,) # noqa + iteritems = lambda x: x.items() + + def implements_str(x): + # type: (T) -> T + return x + + def reraise(tp, value, tb=None): + # type: (Optional[Type[BaseException]], Optional[BaseException], Optional[Any]) -> None + assert value is not None + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + +def with_metaclass(meta, *bases): + # type: (Any, *Any) -> Any + class MetaClass(type): + def __new__(metacls, name, this_bases, d): + # type: (Any, Any, Any, Any) -> Any + return meta(name, bases, d) + + return type.__new__(MetaClass, "temporary_class", (), {}) + + +def check_thread_support(): + # type: () -> None + try: + from uwsgi import opt # type: ignore + except ImportError: + return + + # When `threads` is passed in as a uwsgi option, + # `enable-threads` is implied on. + if "threads" in opt: + return + + if str(opt.get("enable-threads", "0")).lower() in ("false", "off", "no", "0"): + from warnings import warn + + warn( + Warning( + "We detected the use of uwsgi with disabled threads. " + "This will cause issues with the transport you are " + "trying to use. Please enable threading for uwsgi. " + '(Add the "enable-threads" flag).' + ) + ) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/_functools.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/_functools.py new file mode 100644 index 0000000000000000000000000000000000000000..8dcf79caaab38dbd1ad922ea1503fc1d16e2ac06 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/_functools.py @@ -0,0 +1,66 @@ +""" +A backport of Python 3 functools to Python 2/3. The only important change +we rely upon is that `update_wrapper` handles AttributeError gracefully. +""" + +from functools import partial + +from sentry_sdk._types import MYPY + +if MYPY: + from typing import Any + from typing import Callable + + +WRAPPER_ASSIGNMENTS = ( + "__module__", + "__name__", + "__qualname__", + "__doc__", + "__annotations__", +) +WRAPPER_UPDATES = ("__dict__",) + + +def update_wrapper( + wrapper, wrapped, assigned=WRAPPER_ASSIGNMENTS, updated=WRAPPER_UPDATES +): + # type: (Any, Any, Any, Any) -> Any + """Update a wrapper function to look like the wrapped function + + wrapper is the function to be updated + wrapped is the original function + assigned is a tuple naming the attributes assigned directly + from the wrapped function to the wrapper function (defaults to + functools.WRAPPER_ASSIGNMENTS) + updated is a tuple naming the attributes of the wrapper that + are updated with the corresponding attribute from the wrapped + function (defaults to functools.WRAPPER_UPDATES) + """ + for attr in assigned: + try: + value = getattr(wrapped, attr) + except AttributeError: + pass + else: + setattr(wrapper, attr, value) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr, {})) + # Issue #17482: set __wrapped__ last so we don't inadvertently copy it + # from the wrapped function when updating __dict__ + wrapper.__wrapped__ = wrapped + # Return the wrapper so this can be used as a decorator via partial() + return wrapper + + +def wraps(wrapped, assigned=WRAPPER_ASSIGNMENTS, updated=WRAPPER_UPDATES): + # type: (Callable[..., Any], Any, Any) -> Callable[[Callable[..., Any]], Callable[..., Any]] + """Decorator factory to apply update_wrapper() to a wrapper function + + Returns a decorator that invokes update_wrapper() with the decorated + function as the wrapper argument and the arguments to wraps() as the + remaining arguments. Default arguments are as for update_wrapper(). + This is a convenience function to simplify applying partial() to + update_wrapper(). + """ + return partial(update_wrapper, wrapped=wrapped, assigned=assigned, updated=updated) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/attachments.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/attachments.py new file mode 100644 index 0000000000000000000000000000000000000000..b7b6b0b45b73766ddf114b59181db27960ed2c28 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/attachments.py @@ -0,0 +1,55 @@ +import os +import mimetypes + +from sentry_sdk._types import MYPY +from sentry_sdk.envelope import Item, PayloadRef + +if MYPY: + from typing import Optional, Union, Callable + + +class Attachment(object): + def __init__( + self, + bytes=None, # type: Union[None, bytes, Callable[[], bytes]] + filename=None, # type: Optional[str] + path=None, # type: Optional[str] + content_type=None, # type: Optional[str] + add_to_transactions=False, # type: bool + ): + # type: (...) -> None + if bytes is None and path is None: + raise TypeError("path or raw bytes required for attachment") + if filename is None and path is not None: + filename = os.path.basename(path) + if filename is None: + raise TypeError("filename is required for attachment") + if content_type is None: + content_type = mimetypes.guess_type(filename)[0] + self.bytes = bytes + self.filename = filename + self.path = path + self.content_type = content_type + self.add_to_transactions = add_to_transactions + + def to_envelope_item(self): + # type: () -> Item + """Returns an envelope item for this attachment.""" + payload = None # type: Union[None, PayloadRef, bytes] + if self.bytes is not None: + if callable(self.bytes): + payload = self.bytes() + else: + payload = self.bytes + else: + payload = PayloadRef(path=self.path) + return Item( + payload=payload, + type="attachment", + content_type=self.content_type, + filename=self.filename, + ) + + def __repr__(self): + # type: () -> str + return "" % (self.filename,) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/consts.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/consts.py new file mode 100644 index 0000000000000000000000000000000000000000..1624934b28f06ad325540e9c37635658a558526a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/consts.py @@ -0,0 +1,110 @@ +from sentry_sdk._types import MYPY + +if MYPY: + import sentry_sdk + + from typing import Optional + from typing import Callable + from typing import Union + from typing import List + from typing import Type + from typing import Dict + from typing import Any + from typing import Sequence + from typing_extensions import TypedDict + + from sentry_sdk.integrations import Integration + + from sentry_sdk._types import ( + BreadcrumbProcessor, + Event, + EventProcessor, + TracesSampler, + ) + + # Experiments are feature flags to enable and disable certain unstable SDK + # functionality. Changing them from the defaults (`None`) in production + # code is highly discouraged. They are not subject to any stability + # guarantees such as the ones from semantic versioning. + Experiments = TypedDict( + "Experiments", + { + "max_spans": Optional[int], + "record_sql_params": Optional[bool], + "smart_transaction_trimming": Optional[bool], + "propagate_tracestate": Optional[bool], + "custom_measurements": Optional[bool], + }, + total=False, + ) + +DEFAULT_QUEUE_SIZE = 100 +DEFAULT_MAX_BREADCRUMBS = 100 + + +# This type exists to trick mypy and PyCharm into thinking `init` and `Client` +# take these arguments (even though they take opaque **kwargs) +class ClientConstructor(object): + def __init__( + self, + dsn=None, # type: Optional[str] + with_locals=True, # type: bool + max_breadcrumbs=DEFAULT_MAX_BREADCRUMBS, # type: int + release=None, # type: Optional[str] + environment=None, # type: Optional[str] + server_name=None, # type: Optional[str] + shutdown_timeout=2, # type: float + integrations=[], # type: Sequence[Integration] # noqa: B006 + in_app_include=[], # type: List[str] # noqa: B006 + in_app_exclude=[], # type: List[str] # noqa: B006 + default_integrations=True, # type: bool + dist=None, # type: Optional[str] + transport=None, # type: Optional[Union[sentry_sdk.transport.Transport, Type[sentry_sdk.transport.Transport], Callable[[Event], None]]] + transport_queue_size=DEFAULT_QUEUE_SIZE, # type: int + sample_rate=1.0, # type: float + send_default_pii=False, # type: bool + http_proxy=None, # type: Optional[str] + https_proxy=None, # type: Optional[str] + ignore_errors=[], # type: List[Union[type, str]] # noqa: B006 + request_bodies="medium", # type: str + before_send=None, # type: Optional[EventProcessor] + before_breadcrumb=None, # type: Optional[BreadcrumbProcessor] + debug=False, # type: bool + attach_stacktrace=False, # type: bool + ca_certs=None, # type: Optional[str] + propagate_traces=True, # type: bool + traces_sample_rate=None, # type: Optional[float] + traces_sampler=None, # type: Optional[TracesSampler] + auto_enabling_integrations=True, # type: bool + auto_session_tracking=True, # type: bool + send_client_reports=True, # type: bool + _experiments={}, # type: Experiments # noqa: B006 + ): + # type: (...) -> None + pass + + +def _get_default_options(): + # type: () -> Dict[str, Any] + import inspect + + if hasattr(inspect, "getfullargspec"): + getargspec = inspect.getfullargspec + else: + getargspec = inspect.getargspec # type: ignore + + a = getargspec(ClientConstructor.__init__) + defaults = a.defaults or () + return dict(zip(a.args[-len(defaults) :], defaults)) + + +DEFAULT_OPTIONS = _get_default_options() +del _get_default_options + + +VERSION = "1.7.2" +SDK_INFO = { + "name": "sentry.python", + "version": VERSION, + "packages": [{"name": "pypi:sentry-sdk", "version": VERSION}], +} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/debug.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/debug.py new file mode 100644 index 0000000000000000000000000000000000000000..fe8ae50cead3d8e1551427df36fc82393c8594f7 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/debug.py @@ -0,0 +1,44 @@ +import sys +import logging + +from sentry_sdk import utils +from sentry_sdk.hub import Hub +from sentry_sdk.utils import logger +from sentry_sdk.client import _client_init_debug +from logging import LogRecord + + +class _HubBasedClientFilter(logging.Filter): + def filter(self, record): + # type: (LogRecord) -> bool + if _client_init_debug.get(False): + return True + hub = Hub.current + if hub is not None and hub.client is not None: + return hub.client.options["debug"] + return False + + +def init_debug_support(): + # type: () -> None + if not logger.handlers: + configure_logger() + configure_debug_hub() + + +def configure_logger(): + # type: () -> None + _handler = logging.StreamHandler(sys.stderr) + _handler.setFormatter(logging.Formatter(" [sentry] %(levelname)s: %(message)s")) + logger.addHandler(_handler) + logger.setLevel(logging.DEBUG) + logger.addFilter(_HubBasedClientFilter()) + + +def configure_debug_hub(): + # type: () -> None + def _get_debug_hub(): + # type: () -> Hub + return Hub.current + + utils._get_debug_hub = _get_debug_hub diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/scope.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/scope.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a2dc7a8db2775c035c9f74913b59570b47a85c --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/scope.py @@ -0,0 +1,507 @@ +from copy import copy +from collections import deque +from itertools import chain + +from sentry_sdk._functools import wraps +from sentry_sdk._types import MYPY +from sentry_sdk.utils import logger, capture_internal_exceptions +from sentry_sdk.tracing import Transaction +from sentry_sdk.attachments import Attachment + +if MYPY: + from typing import Any + from typing import Dict + from typing import Optional + from typing import Deque + from typing import List + from typing import Callable + from typing import TypeVar + + from sentry_sdk._types import ( + Breadcrumb, + Event, + EventProcessor, + ErrorProcessor, + ExcInfo, + Hint, + Type, + ) + + from sentry_sdk.tracing import Span + from sentry_sdk.session import Session + + F = TypeVar("F", bound=Callable[..., Any]) + T = TypeVar("T") + + +global_event_processors = [] # type: List[EventProcessor] + + +def add_global_event_processor(processor): + # type: (EventProcessor) -> None + global_event_processors.append(processor) + + +def _attr_setter(fn): + # type: (Any) -> Any + return property(fset=fn, doc=fn.__doc__) + + +def _disable_capture(fn): + # type: (F) -> F + @wraps(fn) + def wrapper(self, *args, **kwargs): + # type: (Any, *Dict[str, Any], **Any) -> Any + if not self._should_capture: + return + try: + self._should_capture = False + return fn(self, *args, **kwargs) + finally: + self._should_capture = True + + return wrapper # type: ignore + + +class Scope(object): + """The scope holds extra information that should be sent with all + events that belong to it. + """ + + # NOTE: Even though it should not happen, the scope needs to not crash when + # accessed by multiple threads. It's fine if it's full of races, but those + # races should never make the user application crash. + # + # The same needs to hold for any accesses of the scope the SDK makes. + + __slots__ = ( + "_level", + "_name", + "_fingerprint", + # note that for legacy reasons, _transaction is the transaction *name*, + # not a Transaction object (the object is stored in _span) + "_transaction", + "_transaction_info", + "_user", + "_tags", + "_contexts", + "_extras", + "_breadcrumbs", + "_event_processors", + "_error_processors", + "_should_capture", + "_span", + "_session", + "_attachments", + "_force_auto_session_tracking", + ) + + def __init__(self): + # type: () -> None + self._event_processors = [] # type: List[EventProcessor] + self._error_processors = [] # type: List[ErrorProcessor] + + self._name = None # type: Optional[str] + self.clear() + + def clear(self): + # type: () -> None + """Clears the entire scope.""" + self._level = None # type: Optional[str] + self._fingerprint = None # type: Optional[List[str]] + self._transaction = None # type: Optional[str] + self._transaction_info = {} # type: Dict[str, str] + self._user = None # type: Optional[Dict[str, Any]] + + self._tags = {} # type: Dict[str, Any] + self._contexts = {} # type: Dict[str, Dict[str, Any]] + self._extras = {} # type: Dict[str, Any] + self._attachments = [] # type: List[Attachment] + + self.clear_breadcrumbs() + self._should_capture = True + + self._span = None # type: Optional[Span] + self._session = None # type: Optional[Session] + self._force_auto_session_tracking = None # type: Optional[bool] + + @_attr_setter + def level(self, value): + # type: (Optional[str]) -> None + """When set this overrides the level. Deprecated in favor of set_level.""" + self._level = value + + def set_level(self, value): + # type: (Optional[str]) -> None + """Sets the level for the scope.""" + self._level = value + + @_attr_setter + def fingerprint(self, value): + # type: (Optional[List[str]]) -> None + """When set this overrides the default fingerprint.""" + self._fingerprint = value + + @property + def transaction(self): + # type: () -> Any + # would be type: () -> Optional[Transaction], see https://github.com/python/mypy/issues/3004 + """Return the transaction (root span) in the scope, if any.""" + + # there is no span/transaction on the scope + if self._span is None: + return None + + # there is an orphan span on the scope + if self._span.containing_transaction is None: + return None + + # there is either a transaction (which is its own containing + # transaction) or a non-orphan span on the scope + return self._span.containing_transaction + + @transaction.setter + def transaction(self, value): + # type: (Any) -> None + # would be type: (Optional[str]) -> None, see https://github.com/python/mypy/issues/3004 + """When set this forces a specific transaction name to be set. + + Deprecated: use set_transaction_name instead.""" + + # XXX: the docstring above is misleading. The implementation of + # apply_to_event prefers an existing value of event.transaction over + # anything set in the scope. + # XXX: note that with the introduction of the Scope.transaction getter, + # there is a semantic and type mismatch between getter and setter. The + # getter returns a Transaction, the setter sets a transaction name. + # Without breaking version compatibility, we could make the setter set a + # transaction name or transaction (self._span) depending on the type of + # the value argument. + + logger.warning( + "Assigning to scope.transaction directly is deprecated: use scope.set_transaction_name() instead." + ) + self._transaction = value + if self._span and self._span.containing_transaction: + self._span.containing_transaction.name = value + + def set_transaction_name(self, name, source=None): + # type: (str, Optional[str]) -> None + """Set the transaction name and optionally the transaction source.""" + self._transaction = name + + if self._span and self._span.containing_transaction: + self._span.containing_transaction.name = name + if source: + self._span.containing_transaction.source = source + + if source: + self._transaction_info["source"] = source + + @_attr_setter + def user(self, value): + # type: (Optional[Dict[str, Any]]) -> None + """When set a specific user is bound to the scope. Deprecated in favor of set_user.""" + self.set_user(value) + + def set_user(self, value): + # type: (Optional[Dict[str, Any]]) -> None + """Sets a user for the scope.""" + self._user = value + if self._session is not None: + self._session.update(user=value) + + @property + def span(self): + # type: () -> Optional[Span] + """Get/set current tracing span or transaction.""" + return self._span + + @span.setter + def span(self, span): + # type: (Optional[Span]) -> None + self._span = span + # XXX: this differs from the implementation in JS, there Scope.setSpan + # does not set Scope._transactionName. + if isinstance(span, Transaction): + transaction = span + if transaction.name: + self._transaction = transaction.name + + def set_tag( + self, + key, # type: str + value, # type: Any + ): + # type: (...) -> None + """Sets a tag for a key to a specific value.""" + self._tags[key] = value + + def remove_tag( + self, key # type: str + ): + # type: (...) -> None + """Removes a specific tag.""" + self._tags.pop(key, None) + + def set_context( + self, + key, # type: str + value, # type: Dict[str, Any] + ): + # type: (...) -> None + """Binds a context at a certain key to a specific value.""" + self._contexts[key] = value + + def remove_context( + self, key # type: str + ): + # type: (...) -> None + """Removes a context.""" + self._contexts.pop(key, None) + + def set_extra( + self, + key, # type: str + value, # type: Any + ): + # type: (...) -> None + """Sets an extra key to a specific value.""" + self._extras[key] = value + + def remove_extra( + self, key # type: str + ): + # type: (...) -> None + """Removes a specific extra key.""" + self._extras.pop(key, None) + + def clear_breadcrumbs(self): + # type: () -> None + """Clears breadcrumb buffer.""" + self._breadcrumbs = deque() # type: Deque[Breadcrumb] + + def add_attachment( + self, + bytes=None, # type: Optional[bytes] + filename=None, # type: Optional[str] + path=None, # type: Optional[str] + content_type=None, # type: Optional[str] + add_to_transactions=False, # type: bool + ): + # type: (...) -> None + """Adds an attachment to future events sent.""" + self._attachments.append( + Attachment( + bytes=bytes, + path=path, + filename=filename, + content_type=content_type, + add_to_transactions=add_to_transactions, + ) + ) + + def add_event_processor( + self, func # type: EventProcessor + ): + # type: (...) -> None + """Register a scope local event processor on the scope. + + :param func: This function behaves like `before_send.` + """ + if len(self._event_processors) > 20: + logger.warning( + "Too many event processors on scope! Clearing list to free up some memory: %r", + self._event_processors, + ) + del self._event_processors[:] + + self._event_processors.append(func) + + def add_error_processor( + self, + func, # type: ErrorProcessor + cls=None, # type: Optional[Type[BaseException]] + ): + # type: (...) -> None + """Register a scope local error processor on the scope. + + :param func: A callback that works similar to an event processor but is invoked with the original exception info triple as second argument. + + :param cls: Optionally, only process exceptions of this type. + """ + if cls is not None: + cls_ = cls # For mypy. + real_func = func + + def func(event, exc_info): + # type: (Event, ExcInfo) -> Optional[Event] + try: + is_inst = isinstance(exc_info[1], cls_) + except Exception: + is_inst = False + if is_inst: + return real_func(event, exc_info) + return event + + self._error_processors.append(func) + + @_disable_capture + def apply_to_event( + self, + event, # type: Event + hint, # type: Hint + ): + # type: (...) -> Optional[Event] + """Applies the information contained on the scope to the given event.""" + + def _drop(event, cause, ty): + # type: (Dict[str, Any], Any, str) -> Optional[Any] + logger.info("%s (%s) dropped event (%s)", ty, cause, event) + return None + + is_transaction = event.get("type") == "transaction" + + # put all attachments into the hint. This lets callbacks play around + # with attachments. We also later pull this out of the hint when we + # create the envelope. + attachments_to_send = hint.get("attachments") or [] + for attachment in self._attachments: + if not is_transaction or attachment.add_to_transactions: + attachments_to_send.append(attachment) + hint["attachments"] = attachments_to_send + + if self._level is not None: + event["level"] = self._level + + if not is_transaction: + event.setdefault("breadcrumbs", {}).setdefault("values", []).extend( + self._breadcrumbs + ) + + if event.get("user") is None and self._user is not None: + event["user"] = self._user + + if event.get("transaction") is None and self._transaction is not None: + event["transaction"] = self._transaction + + if event.get("transaction_info") is None and self._transaction_info is not None: + event["transaction_info"] = self._transaction_info + + if event.get("fingerprint") is None and self._fingerprint is not None: + event["fingerprint"] = self._fingerprint + + if self._extras: + event.setdefault("extra", {}).update(self._extras) + + if self._tags: + event.setdefault("tags", {}).update(self._tags) + + if self._contexts: + event.setdefault("contexts", {}).update(self._contexts) + + if self._span is not None: + contexts = event.setdefault("contexts", {}) + if not contexts.get("trace"): + contexts["trace"] = self._span.get_trace_context() + + exc_info = hint.get("exc_info") + if exc_info is not None: + for error_processor in self._error_processors: + new_event = error_processor(event, exc_info) + if new_event is None: + return _drop(event, error_processor, "error processor") + event = new_event + + for event_processor in chain(global_event_processors, self._event_processors): + new_event = event + with capture_internal_exceptions(): + new_event = event_processor(event, hint) + if new_event is None: + return _drop(event, event_processor, "event processor") + event = new_event + + return event + + def update_from_scope(self, scope): + # type: (Scope) -> None + if scope._level is not None: + self._level = scope._level + if scope._fingerprint is not None: + self._fingerprint = scope._fingerprint + if scope._transaction is not None: + self._transaction = scope._transaction + if scope._transaction_info is not None: + self._transaction_info.update(scope._transaction_info) + if scope._user is not None: + self._user = scope._user + if scope._tags: + self._tags.update(scope._tags) + if scope._contexts: + self._contexts.update(scope._contexts) + if scope._extras: + self._extras.update(scope._extras) + if scope._breadcrumbs: + self._breadcrumbs.extend(scope._breadcrumbs) + if scope._span: + self._span = scope._span + if scope._attachments: + self._attachments.extend(scope._attachments) + + def update_from_kwargs( + self, + user=None, # type: Optional[Any] + level=None, # type: Optional[str] + extras=None, # type: Optional[Dict[str, Any]] + contexts=None, # type: Optional[Dict[str, Any]] + tags=None, # type: Optional[Dict[str, str]] + fingerprint=None, # type: Optional[List[str]] + ): + # type: (...) -> None + if level is not None: + self._level = level + if user is not None: + self._user = user + if extras is not None: + self._extras.update(extras) + if contexts is not None: + self._contexts.update(contexts) + if tags is not None: + self._tags.update(tags) + if fingerprint is not None: + self._fingerprint = fingerprint + + def __copy__(self): + # type: () -> Scope + rv = object.__new__(self.__class__) # type: Scope + + rv._level = self._level + rv._name = self._name + rv._fingerprint = self._fingerprint + rv._transaction = self._transaction + rv._transaction_info = dict(self._transaction_info) + rv._user = self._user + + rv._tags = dict(self._tags) + rv._contexts = dict(self._contexts) + rv._extras = dict(self._extras) + + rv._breadcrumbs = copy(self._breadcrumbs) + rv._event_processors = list(self._event_processors) + rv._error_processors = list(self._error_processors) + + rv._should_capture = self._should_capture + rv._span = self._span + rv._session = self._session + rv._force_auto_session_tracking = self._force_auto_session_tracking + rv._attachments = list(self._attachments) + + return rv + + def __repr__(self): + # type: () -> str + return "<%s id=%s name=%s>" % ( + self.__class__.__name__, + hex(id(self)), + self._name, + ) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/serializer.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/serializer.py new file mode 100644 index 0000000000000000000000000000000000000000..e657f6b2b84104badad8a6d4f98c2d4310ba8ff8 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/serializer.py @@ -0,0 +1,468 @@ +import sys +import math + +from datetime import datetime + +from sentry_sdk.utils import ( + AnnotatedValue, + capture_internal_exception, + disable_capture_event, + format_timestamp, + json_dumps, + safe_repr, + strip_string, +) + +import sentry_sdk.utils + +from sentry_sdk._compat import text_type, PY2, string_types, number_types, iteritems + +from sentry_sdk._types import MYPY + +if MYPY: + from datetime import timedelta + + from types import TracebackType + + from typing import Any + from typing import Callable + from typing import ContextManager + from typing import Dict + from typing import List + from typing import Optional + from typing import Tuple + from typing import Type + from typing import Union + + from sentry_sdk._types import NotImplementedType, Event + + Span = Dict[str, Any] + + ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]] + Segment = Union[str, int] + + +if PY2: + # Importing ABCs from collections is deprecated, and will stop working in 3.8 + # https://github.com/python/cpython/blob/master/Lib/collections/__init__.py#L49 + from collections import Mapping, Sequence, Set + + serializable_str_types = string_types + +else: + # New in 3.3 + # https://docs.python.org/3/library/collections.abc.html + from collections.abc import Mapping, Sequence, Set + + # Bytes are technically not strings in Python 3, but we can serialize them + serializable_str_types = (str, bytes) + + +# Maximum length of JSON-serialized event payloads that can be safely sent +# before the server may reject the event due to its size. This is not intended +# to reflect actual values defined server-side, but rather only be an upper +# bound for events sent by the SDK. +# +# Can be overwritten if wanting to send more bytes, e.g. with a custom server. +# When changing this, keep in mind that events may be a little bit larger than +# this value due to attached metadata, so keep the number conservative. +MAX_EVENT_BYTES = 10**6 + +MAX_DATABAG_DEPTH = 5 +MAX_DATABAG_BREADTH = 10 +CYCLE_MARKER = "" + + +global_repr_processors = [] # type: List[ReprProcessor] + + +def add_global_repr_processor(processor): + # type: (ReprProcessor) -> None + global_repr_processors.append(processor) + + +class Memo(object): + __slots__ = ("_ids", "_objs") + + def __init__(self): + # type: () -> None + self._ids = {} # type: Dict[int, Any] + self._objs = [] # type: List[Any] + + def memoize(self, obj): + # type: (Any) -> ContextManager[bool] + self._objs.append(obj) + return self + + def __enter__(self): + # type: () -> bool + obj = self._objs[-1] + if id(obj) in self._ids: + return True + else: + self._ids[id(obj)] = obj + return False + + def __exit__( + self, + ty, # type: Optional[Type[BaseException]] + value, # type: Optional[BaseException] + tb, # type: Optional[TracebackType] + ): + # type: (...) -> None + self._ids.pop(id(self._objs.pop()), None) + + +def serialize(event, smart_transaction_trimming=False, **kwargs): + # type: (Event, bool, **Any) -> Event + memo = Memo() + path = [] # type: List[Segment] + meta_stack = [] # type: List[Dict[str, Any]] + span_description_bytes = [] # type: List[int] + + def _annotate(**meta): + # type: (**Any) -> None + while len(meta_stack) <= len(path): + try: + segment = path[len(meta_stack) - 1] + node = meta_stack[-1].setdefault(text_type(segment), {}) + except IndexError: + node = {} + + meta_stack.append(node) + + meta_stack[-1].setdefault("", {}).update(meta) + + def _should_repr_strings(): + # type: () -> Optional[bool] + """ + By default non-serializable objects are going through + safe_repr(). For certain places in the event (local vars) we + want to repr() even things that are JSON-serializable to + make their type more apparent. For example, it's useful to + see the difference between a unicode-string and a bytestring + when viewing a stacktrace. + + For container-types we still don't do anything different. + Generally we just try to make the Sentry UI present exactly + what a pretty-printed repr would look like. + + :returns: `True` if we are somewhere in frame variables, and `False` if + we are in a position where we will never encounter frame variables + when recursing (for example, we're in `event.extra`). `None` if we + are not (yet) in frame variables, but might encounter them when + recursing (e.g. we're in `event.exception`) + """ + try: + p0 = path[0] + if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars": + return True + + if ( + p0 in ("threads", "exception") + and path[1] == "values" + and path[3] == "stacktrace" + and path[4] == "frames" + and path[6] == "vars" + ): + return True + except IndexError: + return None + + return False + + def _is_databag(): + # type: () -> Optional[bool] + """ + A databag is any value that we need to trim. + + :returns: Works like `_should_repr_strings()`. `True` for "yes", + `False` for :"no", `None` for "maybe soon". + """ + try: + rv = _should_repr_strings() + if rv in (True, None): + return rv + + p0 = path[0] + if p0 == "request" and path[1] == "data": + return True + + if p0 == "breadcrumbs" and path[1] == "values": + path[2] + return True + + if p0 == "extra": + return True + + except IndexError: + return None + + return False + + def _serialize_node( + obj, # type: Any + is_databag=None, # type: Optional[bool] + should_repr_strings=None, # type: Optional[bool] + segment=None, # type: Optional[Segment] + remaining_breadth=None, # type: Optional[int] + remaining_depth=None, # type: Optional[int] + ): + # type: (...) -> Any + if segment is not None: + path.append(segment) + + try: + with memo.memoize(obj) as result: + if result: + return CYCLE_MARKER + + return _serialize_node_impl( + obj, + is_databag=is_databag, + should_repr_strings=should_repr_strings, + remaining_depth=remaining_depth, + remaining_breadth=remaining_breadth, + ) + except BaseException: + capture_internal_exception(sys.exc_info()) + + if is_databag: + return "" + + return None + finally: + if segment is not None: + path.pop() + del meta_stack[len(path) + 1 :] + + def _flatten_annotated(obj): + # type: (Any) -> Any + if isinstance(obj, AnnotatedValue): + _annotate(**obj.metadata) + obj = obj.value + return obj + + def _serialize_node_impl( + obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth + ): + # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any + if should_repr_strings is None: + should_repr_strings = _should_repr_strings() + + if is_databag is None: + is_databag = _is_databag() + + if is_databag and remaining_depth is None: + remaining_depth = MAX_DATABAG_DEPTH + if is_databag and remaining_breadth is None: + remaining_breadth = MAX_DATABAG_BREADTH + + obj = _flatten_annotated(obj) + + if remaining_depth is not None and remaining_depth <= 0: + _annotate(rem=[["!limit", "x"]]) + if is_databag: + return _flatten_annotated(strip_string(safe_repr(obj))) + return None + + if is_databag and global_repr_processors: + hints = {"memo": memo, "remaining_depth": remaining_depth} + for processor in global_repr_processors: + result = processor(obj, hints) + if result is not NotImplemented: + return _flatten_annotated(result) + + sentry_repr = getattr(type(obj), "__sentry_repr__", None) + + if obj is None or isinstance(obj, (bool, number_types)): + if should_repr_strings or ( + isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj)) + ): + return safe_repr(obj) + else: + return obj + + elif callable(sentry_repr): + return sentry_repr(obj) + + elif isinstance(obj, datetime): + return ( + text_type(format_timestamp(obj)) + if not should_repr_strings + else safe_repr(obj) + ) + + elif isinstance(obj, Mapping): + # Create temporary copy here to avoid calling too much code that + # might mutate our dictionary while we're still iterating over it. + obj = dict(iteritems(obj)) + + rv_dict = {} # type: Dict[str, Any] + i = 0 + + for k, v in iteritems(obj): + if remaining_breadth is not None and i >= remaining_breadth: + _annotate(len=len(obj)) + break + + str_k = text_type(k) + v = _serialize_node( + v, + segment=str_k, + should_repr_strings=should_repr_strings, + is_databag=is_databag, + remaining_depth=remaining_depth - 1 + if remaining_depth is not None + else None, + remaining_breadth=remaining_breadth, + ) + rv_dict[str_k] = v + i += 1 + + return rv_dict + + elif not isinstance(obj, serializable_str_types) and isinstance( + obj, (Set, Sequence) + ): + rv_list = [] + + for i, v in enumerate(obj): + if remaining_breadth is not None and i >= remaining_breadth: + _annotate(len=len(obj)) + break + + rv_list.append( + _serialize_node( + v, + segment=i, + should_repr_strings=should_repr_strings, + is_databag=is_databag, + remaining_depth=remaining_depth - 1 + if remaining_depth is not None + else None, + remaining_breadth=remaining_breadth, + ) + ) + + return rv_list + + if should_repr_strings: + obj = safe_repr(obj) + else: + if isinstance(obj, bytes): + obj = obj.decode("utf-8", "replace") + + if not isinstance(obj, string_types): + obj = safe_repr(obj) + + # Allow span descriptions to be longer than other strings. + # + # For database auto-instrumented spans, the description contains + # potentially long SQL queries that are most useful when not truncated. + # Because arbitrarily large events may be discarded by the server as a + # protection mechanism, we dynamically limit the description length + # later in _truncate_span_descriptions. + if ( + smart_transaction_trimming + and len(path) == 3 + and path[0] == "spans" + and path[-1] == "description" + ): + span_description_bytes.append(len(obj)) + return obj + return _flatten_annotated(strip_string(obj)) + + def _truncate_span_descriptions(serialized_event, event, excess_bytes): + # type: (Event, Event, int) -> None + """ + Modifies serialized_event in-place trying to remove excess_bytes from + span descriptions. The original event is used read-only to access the + span timestamps (represented as RFC3399-formatted strings in + serialized_event). + + It uses heuristics to prioritize preserving the description of spans + that might be the most interesting ones in terms of understanding and + optimizing performance. + """ + # When truncating a description, preserve a small prefix. + min_length = 10 + + def shortest_duration_longest_description_first(args): + # type: (Tuple[int, Span]) -> Tuple[timedelta, int] + i, serialized_span = args + span = event["spans"][i] + now = datetime.utcnow() + start = span.get("start_timestamp") or now + end = span.get("timestamp") or now + duration = end - start + description = serialized_span.get("description") or "" + return (duration, -len(description)) + + # Note: for simplicity we sort spans by exact duration and description + # length. If ever needed, we could have a more involved heuristic, e.g. + # replacing exact durations with "buckets" and/or looking at other span + # properties. + path.append("spans") + for i, span in sorted( + enumerate(serialized_event.get("spans") or []), + key=shortest_duration_longest_description_first, + ): + description = span.get("description") or "" + if len(description) <= min_length: + continue + excess_bytes -= len(description) - min_length + path.extend([i, "description"]) + # Note: the last time we call strip_string we could preserve a few + # more bytes up to a total length of MAX_EVENT_BYTES. Since that's + # not strictly required, we leave it out for now for simplicity. + span["description"] = _flatten_annotated( + strip_string(description, max_length=min_length) + ) + del path[-2:] + del meta_stack[len(path) + 1 :] + + if excess_bytes <= 0: + break + path.pop() + del meta_stack[len(path) + 1 :] + + disable_capture_event.set(True) + try: + rv = _serialize_node(event, **kwargs) + if meta_stack and isinstance(rv, dict): + rv["_meta"] = meta_stack[0] + + sum_span_description_bytes = sum(span_description_bytes) + if smart_transaction_trimming and sum_span_description_bytes > 0: + span_count = len(event.get("spans") or []) + # This is an upper bound of how many bytes all descriptions would + # consume if the usual string truncation in _serialize_node_impl + # would have taken place, not accounting for the metadata attached + # as event["_meta"]. + descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH + + # If by not truncating descriptions we ended up with more bytes than + # per the usual string truncation, check if the event is too large + # and we need to truncate some descriptions. + # + # This is guarded with an if statement to avoid JSON-encoding the + # event unnecessarily. + if sum_span_description_bytes > descriptions_budget_bytes: + original_bytes = len(json_dumps(rv)) + excess_bytes = original_bytes - MAX_EVENT_BYTES + if excess_bytes > 0: + # Event is too large, will likely be discarded by the + # server. Trim it down before sending. + _truncate_span_descriptions(rv, event, excess_bytes) + + # Span descriptions truncated, set or reset _meta. + # + # We run the same code earlier because we want to account + # for _meta when calculating original_bytes, the number of + # bytes in the JSON-encoded event. + if meta_stack and isinstance(rv, dict): + rv["_meta"] = meta_stack[0] + return rv + finally: + disable_capture_event.set(False) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/session.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/session.py new file mode 100644 index 0000000000000000000000000000000000000000..98a8c72cbba8f3ce664a1fbd1808e566cfc97193 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/session.py @@ -0,0 +1,174 @@ +import uuid +from datetime import datetime + +from sentry_sdk._types import MYPY +from sentry_sdk.utils import format_timestamp + +if MYPY: + from typing import Optional + from typing import Union + from typing import Any + from typing import Dict + + from sentry_sdk._types import SessionStatus + + +def _minute_trunc(ts): + # type: (datetime) -> datetime + return ts.replace(second=0, microsecond=0) + + +def _make_uuid( + val, # type: Union[str, uuid.UUID] +): + # type: (...) -> uuid.UUID + if isinstance(val, uuid.UUID): + return val + return uuid.UUID(val) + + +class Session(object): + def __init__( + self, + sid=None, # type: Optional[Union[str, uuid.UUID]] + did=None, # type: Optional[str] + timestamp=None, # type: Optional[datetime] + started=None, # type: Optional[datetime] + duration=None, # type: Optional[float] + status=None, # type: Optional[SessionStatus] + release=None, # type: Optional[str] + environment=None, # type: Optional[str] + user_agent=None, # type: Optional[str] + ip_address=None, # type: Optional[str] + errors=None, # type: Optional[int] + user=None, # type: Optional[Any] + session_mode="application", # type: str + ): + # type: (...) -> None + if sid is None: + sid = uuid.uuid4() + if started is None: + started = datetime.utcnow() + if status is None: + status = "ok" + self.status = status + self.did = None # type: Optional[str] + self.started = started + self.release = None # type: Optional[str] + self.environment = None # type: Optional[str] + self.duration = None # type: Optional[float] + self.user_agent = None # type: Optional[str] + self.ip_address = None # type: Optional[str] + self.session_mode = session_mode # type: str + self.errors = 0 + + self.update( + sid=sid, + did=did, + timestamp=timestamp, + duration=duration, + release=release, + environment=environment, + user_agent=user_agent, + ip_address=ip_address, + errors=errors, + user=user, + ) + + @property + def truncated_started(self): + # type: (...) -> datetime + return _minute_trunc(self.started) + + def update( + self, + sid=None, # type: Optional[Union[str, uuid.UUID]] + did=None, # type: Optional[str] + timestamp=None, # type: Optional[datetime] + started=None, # type: Optional[datetime] + duration=None, # type: Optional[float] + status=None, # type: Optional[SessionStatus] + release=None, # type: Optional[str] + environment=None, # type: Optional[str] + user_agent=None, # type: Optional[str] + ip_address=None, # type: Optional[str] + errors=None, # type: Optional[int] + user=None, # type: Optional[Any] + ): + # type: (...) -> None + # If a user is supplied we pull some data form it + if user: + if ip_address is None: + ip_address = user.get("ip_address") + if did is None: + did = user.get("id") or user.get("email") or user.get("username") + + if sid is not None: + self.sid = _make_uuid(sid) + if did is not None: + self.did = str(did) + if timestamp is None: + timestamp = datetime.utcnow() + self.timestamp = timestamp + if started is not None: + self.started = started + if duration is not None: + self.duration = duration + if release is not None: + self.release = release + if environment is not None: + self.environment = environment + if ip_address is not None: + self.ip_address = ip_address + if user_agent is not None: + self.user_agent = user_agent + if errors is not None: + self.errors = errors + + if status is not None: + self.status = status + + def close( + self, status=None # type: Optional[SessionStatus] + ): + # type: (...) -> Any + if status is None and self.status == "ok": + status = "exited" + if status is not None: + self.update(status=status) + + def get_json_attrs( + self, with_user_info=True # type: Optional[bool] + ): + # type: (...) -> Any + attrs = {} + if self.release is not None: + attrs["release"] = self.release + if self.environment is not None: + attrs["environment"] = self.environment + if with_user_info: + if self.ip_address is not None: + attrs["ip_address"] = self.ip_address + if self.user_agent is not None: + attrs["user_agent"] = self.user_agent + return attrs + + def to_json(self): + # type: (...) -> Any + rv = { + "sid": str(self.sid), + "init": True, + "started": format_timestamp(self.started), + "timestamp": format_timestamp(self.timestamp), + "status": self.status, + } # type: Dict[str, Any] + if self.errors: + rv["errors"] = self.errors + if self.did is not None: + rv["did"] = self.did + if self.duration is not None: + rv["duration"] = self.duration + attrs = self.get_json_attrs() + if attrs: + rv["attrs"] = attrs + return rv diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/sessions.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/sessions.py new file mode 100644 index 0000000000000000000000000000000000000000..4e4d21b89c748e8967dcb6b8846d62755cdc5a2a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/sessions.py @@ -0,0 +1,175 @@ +import os +import time +from threading import Thread, Lock +from contextlib import contextmanager + +import sentry_sdk +from sentry_sdk.envelope import Envelope +from sentry_sdk.session import Session +from sentry_sdk._types import MYPY +from sentry_sdk.utils import format_timestamp + +if MYPY: + from typing import Any + from typing import Callable + from typing import Dict + from typing import Generator + from typing import List + from typing import Optional + from typing import Union + + +def is_auto_session_tracking_enabled(hub=None): + # type: (Optional[sentry_sdk.Hub]) -> Union[Any, bool, None] + """Utility function to find out if session tracking is enabled.""" + if hub is None: + hub = sentry_sdk.Hub.current + + should_track = hub.scope._force_auto_session_tracking + + if should_track is None: + client_options = hub.client.options if hub.client else {} + should_track = client_options.get("auto_session_tracking", False) + + return should_track + + +@contextmanager +def auto_session_tracking(hub=None, session_mode="application"): + # type: (Optional[sentry_sdk.Hub], str) -> Generator[None, None, None] + """Starts and stops a session automatically around a block.""" + if hub is None: + hub = sentry_sdk.Hub.current + should_track = is_auto_session_tracking_enabled(hub) + if should_track: + hub.start_session(session_mode=session_mode) + try: + yield + finally: + if should_track: + hub.end_session() + + +TERMINAL_SESSION_STATES = ("exited", "abnormal", "crashed") +MAX_ENVELOPE_ITEMS = 100 + + +def make_aggregate_envelope(aggregate_states, attrs): + # type: (Any, Any) -> Any + return {"attrs": dict(attrs), "aggregates": list(aggregate_states.values())} + + +class SessionFlusher(object): + def __init__( + self, + capture_func, # type: Callable[[Envelope], None] + flush_interval=60, # type: int + ): + # type: (...) -> None + self.capture_func = capture_func + self.flush_interval = flush_interval + self.pending_sessions = [] # type: List[Any] + self.pending_aggregates = {} # type: Dict[Any, Any] + self._thread = None # type: Optional[Thread] + self._thread_lock = Lock() + self._aggregate_lock = Lock() + self._thread_for_pid = None # type: Optional[int] + self._running = True + + def flush(self): + # type: (...) -> None + pending_sessions = self.pending_sessions + self.pending_sessions = [] + + with self._aggregate_lock: + pending_aggregates = self.pending_aggregates + self.pending_aggregates = {} + + envelope = Envelope() + for session in pending_sessions: + if len(envelope.items) == MAX_ENVELOPE_ITEMS: + self.capture_func(envelope) + envelope = Envelope() + + envelope.add_session(session) + + for (attrs, states) in pending_aggregates.items(): + if len(envelope.items) == MAX_ENVELOPE_ITEMS: + self.capture_func(envelope) + envelope = Envelope() + + envelope.add_sessions(make_aggregate_envelope(states, attrs)) + + if len(envelope.items) > 0: + self.capture_func(envelope) + + def _ensure_running(self): + # type: (...) -> None + if self._thread_for_pid == os.getpid() and self._thread is not None: + return None + with self._thread_lock: + if self._thread_for_pid == os.getpid() and self._thread is not None: + return None + + def _thread(): + # type: (...) -> None + while self._running: + time.sleep(self.flush_interval) + if self._running: + self.flush() + + thread = Thread(target=_thread) + thread.daemon = True + thread.start() + self._thread = thread + self._thread_for_pid = os.getpid() + return None + + def add_aggregate_session( + self, session # type: Session + ): + # type: (...) -> None + # NOTE on `session.did`: + # the protocol can deal with buckets that have a distinct-id, however + # in practice we expect the python SDK to have an extremely high cardinality + # here, effectively making aggregation useless, therefore we do not + # aggregate per-did. + + # For this part we can get away with using the global interpreter lock + with self._aggregate_lock: + attrs = session.get_json_attrs(with_user_info=False) + primary_key = tuple(sorted(attrs.items())) + secondary_key = session.truncated_started # (, session.did) + states = self.pending_aggregates.setdefault(primary_key, {}) + state = states.setdefault(secondary_key, {}) + + if "started" not in state: + state["started"] = format_timestamp(session.truncated_started) + # if session.did is not None: + # state["did"] = session.did + if session.status == "crashed": + state["crashed"] = state.get("crashed", 0) + 1 + elif session.status == "abnormal": + state["abnormal"] = state.get("abnormal", 0) + 1 + elif session.errors > 0: + state["errored"] = state.get("errored", 0) + 1 + else: + state["exited"] = state.get("exited", 0) + 1 + + def add_session( + self, session # type: Session + ): + # type: (...) -> None + if session.session_mode == "request": + self.add_aggregate_session(session) + else: + self.pending_sessions.append(session.to_json()) + self._ensure_running() + + def kill(self): + # type: (...) -> None + self._running = False + + def __del__(self): + # type: (...) -> None + self.kill() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/transport.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/transport.py new file mode 100644 index 0000000000000000000000000000000000000000..fca6fa8aec10a7cdcafb3be7bfda7915feafd5a4 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/transport.py @@ -0,0 +1,531 @@ +from __future__ import print_function + +import io +import urllib3 # type: ignore +import certifi +import gzip +import time + +from datetime import datetime, timedelta +from collections import defaultdict + +from sentry_sdk.utils import Dsn, logger, capture_internal_exceptions, json_dumps +from sentry_sdk.worker import BackgroundWorker +from sentry_sdk.envelope import Envelope, Item, PayloadRef + +from sentry_sdk._types import MYPY + +if MYPY: + from typing import Any + from typing import Callable + from typing import Dict + from typing import Iterable + from typing import Optional + from typing import Tuple + from typing import Type + from typing import Union + from typing import DefaultDict + + from urllib3.poolmanager import PoolManager # type: ignore + from urllib3.poolmanager import ProxyManager + + from sentry_sdk._types import Event, EndpointType + + DataCategory = Optional[str] + +try: + from urllib.request import getproxies +except ImportError: + from urllib import getproxies # type: ignore + + +class Transport(object): + """Baseclass for all transports. + + A transport is used to send an event to sentry. + """ + + parsed_dsn = None # type: Optional[Dsn] + + def __init__( + self, options=None # type: Optional[Dict[str, Any]] + ): + # type: (...) -> None + self.options = options + if options and options["dsn"] is not None and options["dsn"]: + self.parsed_dsn = Dsn(options["dsn"]) + else: + self.parsed_dsn = None + + def capture_event( + self, event # type: Event + ): + # type: (...) -> None + """ + This gets invoked with the event dictionary when an event should + be sent to sentry. + """ + raise NotImplementedError() + + def capture_envelope( + self, envelope # type: Envelope + ): + # type: (...) -> None + """ + Send an envelope to Sentry. + + Envelopes are a data container format that can hold any type of data + submitted to Sentry. We use it for transactions and sessions, but + regular "error" events should go through `capture_event` for backwards + compat. + """ + raise NotImplementedError() + + def flush( + self, + timeout, # type: float + callback=None, # type: Optional[Any] + ): + # type: (...) -> None + """Wait `timeout` seconds for the current events to be sent out.""" + pass + + def kill(self): + # type: () -> None + """Forcefully kills the transport.""" + pass + + def record_lost_event( + self, + reason, # type: str + data_category=None, # type: Optional[str] + item=None, # type: Optional[Item] + ): + # type: (...) -> None + """This increments a counter for event loss by reason and + data category. + """ + return None + + def __del__(self): + # type: () -> None + try: + self.kill() + except Exception: + pass + + +def _parse_rate_limits(header, now=None): + # type: (Any, Optional[datetime]) -> Iterable[Tuple[DataCategory, datetime]] + if now is None: + now = datetime.utcnow() + + for limit in header.split(","): + try: + retry_after, categories, _ = limit.strip().split(":", 2) + retry_after = now + timedelta(seconds=int(retry_after)) + for category in categories and categories.split(";") or (None,): + yield category, retry_after + except (LookupError, ValueError): + continue + + +class HttpTransport(Transport): + """The default HTTP transport.""" + + def __init__( + self, options # type: Dict[str, Any] + ): + # type: (...) -> None + from sentry_sdk.consts import VERSION + + Transport.__init__(self, options) + assert self.parsed_dsn is not None + self.options = options # type: Dict[str, Any] + self._worker = BackgroundWorker(queue_size=options["transport_queue_size"]) + self._auth = self.parsed_dsn.to_auth("sentry.python/%s" % VERSION) + self._disabled_until = {} # type: Dict[DataCategory, datetime] + self._retry = urllib3.util.Retry() + self._discarded_events = defaultdict( + int + ) # type: DefaultDict[Tuple[str, str], int] + self._last_client_report_sent = time.time() + + self._pool = self._make_pool( + self.parsed_dsn, + http_proxy=options["http_proxy"], + https_proxy=options["https_proxy"], + ca_certs=options["ca_certs"], + ) + + from sentry_sdk import Hub + + self.hub_cls = Hub + + def record_lost_event( + self, + reason, # type: str + data_category=None, # type: Optional[str] + item=None, # type: Optional[Item] + ): + # type: (...) -> None + if not self.options["send_client_reports"]: + return + + quantity = 1 + if item is not None: + data_category = item.data_category + if data_category == "attachment": + # quantity of 0 is actually 1 as we do not want to count + # empty attachments as actually empty. + quantity = len(item.get_bytes()) or 1 + elif data_category is None: + raise TypeError("data category not provided") + + self._discarded_events[data_category, reason] += quantity + + def _update_rate_limits(self, response): + # type: (urllib3.HTTPResponse) -> None + + # new sentries with more rate limit insights. We honor this header + # no matter of the status code to update our internal rate limits. + header = response.headers.get("x-sentry-rate-limits") + if header: + logger.warning("Rate-limited via x-sentry-rate-limits") + self._disabled_until.update(_parse_rate_limits(header)) + + # old sentries only communicate global rate limit hits via the + # retry-after header on 429. This header can also be emitted on new + # sentries if a proxy in front wants to globally slow things down. + elif response.status == 429: + logger.warning("Rate-limited via 429") + self._disabled_until[None] = datetime.utcnow() + timedelta( + seconds=self._retry.get_retry_after(response) or 60 + ) + + def _send_request( + self, + body, # type: bytes + headers, # type: Dict[str, str] + endpoint_type="store", # type: EndpointType + envelope=None, # type: Optional[Envelope] + ): + # type: (...) -> None + + def record_loss(reason): + # type: (str) -> None + if envelope is None: + self.record_lost_event(reason, data_category="error") + else: + for item in envelope.items: + self.record_lost_event(reason, item=item) + + headers.update( + { + "User-Agent": str(self._auth.client), + "X-Sentry-Auth": str(self._auth.to_header()), + } + ) + try: + response = self._pool.request( + "POST", + str(self._auth.get_api_url(endpoint_type)), + body=body, + headers=headers, + ) + except Exception: + self.on_dropped_event("network") + record_loss("network_error") + raise + + try: + self._update_rate_limits(response) + + if response.status == 429: + # if we hit a 429. Something was rate limited but we already + # acted on this in `self._update_rate_limits`. Note that we + # do not want to record event loss here as we will have recorded + # an outcome in relay already. + self.on_dropped_event("status_429") + pass + + elif response.status >= 300 or response.status < 200: + logger.error( + "Unexpected status code: %s (body: %s)", + response.status, + response.data, + ) + self.on_dropped_event("status_{}".format(response.status)) + record_loss("network_error") + finally: + response.close() + + def on_dropped_event(self, reason): + # type: (str) -> None + return None + + def _fetch_pending_client_report(self, force=False, interval=60): + # type: (bool, int) -> Optional[Item] + if not self.options["send_client_reports"]: + return None + + if not (force or self._last_client_report_sent < time.time() - interval): + return None + + discarded_events = self._discarded_events + self._discarded_events = defaultdict(int) + self._last_client_report_sent = time.time() + + if not discarded_events: + return None + + return Item( + PayloadRef( + json={ + "timestamp": time.time(), + "discarded_events": [ + {"reason": reason, "category": category, "quantity": quantity} + for ( + (category, reason), + quantity, + ) in discarded_events.items() + ], + } + ), + type="client_report", + ) + + def _flush_client_reports(self, force=False): + # type: (bool) -> None + client_report = self._fetch_pending_client_report(force=force, interval=60) + if client_report is not None: + self.capture_envelope(Envelope(items=[client_report])) + + def _check_disabled(self, category): + # type: (str) -> bool + def _disabled(bucket): + # type: (Any) -> bool + ts = self._disabled_until.get(bucket) + return ts is not None and ts > datetime.utcnow() + + return _disabled(category) or _disabled(None) + + def _send_event( + self, event # type: Event + ): + # type: (...) -> None + + if self._check_disabled("error"): + self.on_dropped_event("self_rate_limits") + self.record_lost_event("ratelimit_backoff", data_category="error") + return None + + body = io.BytesIO() + with gzip.GzipFile(fileobj=body, mode="w") as f: + f.write(json_dumps(event)) + + assert self.parsed_dsn is not None + logger.debug( + "Sending event, type:%s level:%s event_id:%s project:%s host:%s" + % ( + event.get("type") or "null", + event.get("level") or "null", + event.get("event_id") or "null", + self.parsed_dsn.project_id, + self.parsed_dsn.host, + ) + ) + self._send_request( + body.getvalue(), + headers={"Content-Type": "application/json", "Content-Encoding": "gzip"}, + ) + return None + + def _send_envelope( + self, envelope # type: Envelope + ): + # type: (...) -> None + + # remove all items from the envelope which are over quota + new_items = [] + for item in envelope.items: + if self._check_disabled(item.data_category): + if item.data_category in ("transaction", "error", "default"): + self.on_dropped_event("self_rate_limits") + self.record_lost_event("ratelimit_backoff", item=item) + else: + new_items.append(item) + + # Since we're modifying the envelope here make a copy so that others + # that hold references do not see their envelope modified. + envelope = Envelope(headers=envelope.headers, items=new_items) + + if not envelope.items: + return None + + # since we're already in the business of sending out an envelope here + # check if we have one pending for the stats session envelopes so we + # can attach it to this enveloped scheduled for sending. This will + # currently typically attach the client report to the most recent + # session update. + client_report_item = self._fetch_pending_client_report(interval=30) + if client_report_item is not None: + envelope.items.append(client_report_item) + + body = io.BytesIO() + with gzip.GzipFile(fileobj=body, mode="w") as f: + envelope.serialize_into(f) + + assert self.parsed_dsn is not None + logger.debug( + "Sending envelope [%s] project:%s host:%s", + envelope.description, + self.parsed_dsn.project_id, + self.parsed_dsn.host, + ) + + self._send_request( + body.getvalue(), + headers={ + "Content-Type": "application/x-sentry-envelope", + "Content-Encoding": "gzip", + }, + endpoint_type="envelope", + envelope=envelope, + ) + return None + + def _get_pool_options(self, ca_certs): + # type: (Optional[Any]) -> Dict[str, Any] + return { + "num_pools": 2, + "cert_reqs": "CERT_REQUIRED", + "ca_certs": ca_certs or certifi.where(), + } + + def _in_no_proxy(self, parsed_dsn): + # type: (Dsn) -> bool + no_proxy = getproxies().get("no") + if not no_proxy: + return False + for host in no_proxy.split(","): + host = host.strip() + if parsed_dsn.host.endswith(host) or parsed_dsn.netloc.endswith(host): + return True + return False + + def _make_pool( + self, + parsed_dsn, # type: Dsn + http_proxy, # type: Optional[str] + https_proxy, # type: Optional[str] + ca_certs, # type: Optional[Any] + ): + # type: (...) -> Union[PoolManager, ProxyManager] + proxy = None + no_proxy = self._in_no_proxy(parsed_dsn) + + # try HTTPS first + if parsed_dsn.scheme == "https" and (https_proxy != ""): + proxy = https_proxy or (not no_proxy and getproxies().get("https")) + + # maybe fallback to HTTP proxy + if not proxy and (http_proxy != ""): + proxy = http_proxy or (not no_proxy and getproxies().get("http")) + + opts = self._get_pool_options(ca_certs) + + if proxy: + return urllib3.ProxyManager(proxy, **opts) + else: + return urllib3.PoolManager(**opts) + + def capture_event( + self, event # type: Event + ): + # type: (...) -> None + hub = self.hub_cls.current + + def send_event_wrapper(): + # type: () -> None + with hub: + with capture_internal_exceptions(): + self._send_event(event) + self._flush_client_reports() + + if not self._worker.submit(send_event_wrapper): + self.on_dropped_event("full_queue") + self.record_lost_event("queue_overflow", data_category="error") + + def capture_envelope( + self, envelope # type: Envelope + ): + # type: (...) -> None + hub = self.hub_cls.current + + def send_envelope_wrapper(): + # type: () -> None + with hub: + with capture_internal_exceptions(): + self._send_envelope(envelope) + self._flush_client_reports() + + if not self._worker.submit(send_envelope_wrapper): + self.on_dropped_event("full_queue") + for item in envelope.items: + self.record_lost_event("queue_overflow", item=item) + + def flush( + self, + timeout, # type: float + callback=None, # type: Optional[Any] + ): + # type: (...) -> None + logger.debug("Flushing HTTP transport") + + if timeout > 0: + self._worker.submit(lambda: self._flush_client_reports(force=True)) + self._worker.flush(timeout, callback) + + def kill(self): + # type: () -> None + logger.debug("Killing HTTP transport") + self._worker.kill() + + +class _FunctionTransport(Transport): + def __init__( + self, func # type: Callable[[Event], None] + ): + # type: (...) -> None + Transport.__init__(self) + self._func = func + + def capture_event( + self, event # type: Event + ): + # type: (...) -> None + self._func(event) + return None + + +def make_transport(options): + # type: (Dict[str, Any]) -> Optional[Transport] + ref_transport = options["transport"] + + # If no transport is given, we use the http transport class + if ref_transport is None: + transport_cls = HttpTransport # type: Type[Transport] + elif isinstance(ref_transport, Transport): + return ref_transport + elif isinstance(ref_transport, type) and issubclass(ref_transport, Transport): + transport_cls = ref_transport + elif callable(ref_transport): + return _FunctionTransport(ref_transport) # type: ignore + + # if a transport class is given only instantiate it if the dsn is not + # empty or None + if options["dsn"]: + return transport_cls(options) + + return None diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/utils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ccac6e37e321c32db05057b47f056830ad1bd059 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/utils.py @@ -0,0 +1,1012 @@ +import base64 +import json +import linecache +import logging +import os +import sys +import threading +import subprocess +import re + +from datetime import datetime + +import sentry_sdk +from sentry_sdk._compat import urlparse, text_type, implements_str, PY2 + +from sentry_sdk._types import MYPY + +if MYPY: + from types import FrameType + from types import TracebackType + from typing import Any + from typing import Callable + from typing import Dict + from typing import ContextManager + from typing import Iterator + from typing import List + from typing import Optional + from typing import Set + from typing import Tuple + from typing import Union + from typing import Type + + from sentry_sdk._types import ExcInfo, EndpointType + + +epoch = datetime(1970, 1, 1) + + +# The logger is created here but initialized in the debug support module +logger = logging.getLogger("sentry_sdk.errors") + +MAX_STRING_LENGTH = 512 +BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$") + + +def json_dumps(data): + # type: (Any) -> bytes + """Serialize data into a compact JSON representation encoded as UTF-8.""" + return json.dumps(data, allow_nan=False, separators=(",", ":")).encode("utf-8") + + +def _get_debug_hub(): + # type: () -> Optional[sentry_sdk.Hub] + # This function is replaced by debug.py + pass + + +def get_default_release(): + # type: () -> Optional[str] + """Try to guess a default release.""" + release = os.environ.get("SENTRY_RELEASE") + if release: + return release + + with open(os.path.devnull, "w+") as null: + try: + release = ( + subprocess.Popen( + ["git", "rev-parse", "HEAD"], + stdout=subprocess.PIPE, + stderr=null, + stdin=null, + ) + .communicate()[0] + .strip() + .decode("utf-8") + ) + except (OSError, IOError): + pass + + if release: + return release + + for var in ( + "HEROKU_SLUG_COMMIT", + "SOURCE_VERSION", + "CODEBUILD_RESOLVED_SOURCE_VERSION", + "CIRCLE_SHA1", + "GAE_DEPLOYMENT_ID", + ): + release = os.environ.get(var) + if release: + return release + return None + + +class CaptureInternalException(object): + __slots__ = () + + def __enter__(self): + # type: () -> ContextManager[Any] + return self + + def __exit__(self, ty, value, tb): + # type: (Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType]) -> bool + if ty is not None and value is not None: + capture_internal_exception((ty, value, tb)) + + return True + + +_CAPTURE_INTERNAL_EXCEPTION = CaptureInternalException() + + +def capture_internal_exceptions(): + # type: () -> ContextManager[Any] + return _CAPTURE_INTERNAL_EXCEPTION + + +def capture_internal_exception(exc_info): + # type: (ExcInfo) -> None + hub = _get_debug_hub() + if hub is not None: + hub._capture_internal_exception(exc_info) + + +def to_timestamp(value): + # type: (datetime) -> float + return (value - epoch).total_seconds() + + +def format_timestamp(value): + # type: (datetime) -> str + return value.strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + +def event_hint_with_exc_info(exc_info=None): + # type: (Optional[ExcInfo]) -> Dict[str, Optional[ExcInfo]] + """Creates a hint with the exc info filled in.""" + if exc_info is None: + exc_info = sys.exc_info() + else: + exc_info = exc_info_from_error(exc_info) + if exc_info[0] is None: + exc_info = None + return {"exc_info": exc_info} + + +class BadDsn(ValueError): + """Raised on invalid DSNs.""" + + +@implements_str +class Dsn(object): + """Represents a DSN.""" + + def __init__(self, value): + # type: (Union[Dsn, str]) -> None + if isinstance(value, Dsn): + self.__dict__ = dict(value.__dict__) + return + parts = urlparse.urlsplit(text_type(value)) + + if parts.scheme not in ("http", "https"): + raise BadDsn("Unsupported scheme %r" % parts.scheme) + self.scheme = parts.scheme + + if parts.hostname is None: + raise BadDsn("Missing hostname") + + self.host = parts.hostname + + if parts.port is None: + self.port = self.scheme == "https" and 443 or 80 # type: int + else: + self.port = parts.port + + if not parts.username: + raise BadDsn("Missing public key") + + self.public_key = parts.username + self.secret_key = parts.password + + path = parts.path.rsplit("/", 1) + + try: + self.project_id = text_type(int(path.pop())) + except (ValueError, TypeError): + raise BadDsn("Invalid project in DSN (%r)" % (parts.path or "")[1:]) + + self.path = "/".join(path) + "/" + + @property + def netloc(self): + # type: () -> str + """The netloc part of a DSN.""" + rv = self.host + if (self.scheme, self.port) not in (("http", 80), ("https", 443)): + rv = "%s:%s" % (rv, self.port) + return rv + + def to_auth(self, client=None): + # type: (Optional[Any]) -> Auth + """Returns the auth info object for this dsn.""" + return Auth( + scheme=self.scheme, + host=self.netloc, + path=self.path, + project_id=self.project_id, + public_key=self.public_key, + secret_key=self.secret_key, + client=client, + ) + + def __str__(self): + # type: () -> str + return "%s://%s%s@%s%s%s" % ( + self.scheme, + self.public_key, + self.secret_key and "@" + self.secret_key or "", + self.netloc, + self.path, + self.project_id, + ) + + +class Auth(object): + """Helper object that represents the auth info.""" + + def __init__( + self, + scheme, + host, + project_id, + public_key, + secret_key=None, + version=7, + client=None, + path="/", + ): + # type: (str, str, str, str, Optional[str], int, Optional[Any], str) -> None + self.scheme = scheme + self.host = host + self.path = path + self.project_id = project_id + self.public_key = public_key + self.secret_key = secret_key + self.version = version + self.client = client + + @property + def store_api_url(self): + # type: () -> str + """Returns the API url for storing events. + + Deprecated: use get_api_url instead. + """ + return self.get_api_url(type="store") + + def get_api_url( + self, type="store" # type: EndpointType + ): + # type: (...) -> str + """Returns the API url for storing events.""" + return "%s://%s%sapi/%s/%s/" % ( + self.scheme, + self.host, + self.path, + self.project_id, + type, + ) + + def to_header(self): + # type: () -> str + """Returns the auth header a string.""" + rv = [("sentry_key", self.public_key), ("sentry_version", self.version)] + if self.client is not None: + rv.append(("sentry_client", self.client)) + if self.secret_key is not None: + rv.append(("sentry_secret", self.secret_key)) + return "Sentry " + ", ".join("%s=%s" % (key, value) for key, value in rv) + + +class AnnotatedValue(object): + __slots__ = ("value", "metadata") + + def __init__(self, value, metadata): + # type: (Optional[Any], Dict[str, Any]) -> None + self.value = value + self.metadata = metadata + + +if MYPY: + from typing import TypeVar + + T = TypeVar("T") + Annotated = Union[AnnotatedValue, T] + + +def get_type_name(cls): + # type: (Optional[type]) -> Optional[str] + return getattr(cls, "__qualname__", None) or getattr(cls, "__name__", None) + + +def get_type_module(cls): + # type: (Optional[type]) -> Optional[str] + mod = getattr(cls, "__module__", None) + if mod not in (None, "builtins", "__builtins__"): + return mod + return None + + +def should_hide_frame(frame): + # type: (FrameType) -> bool + try: + mod = frame.f_globals["__name__"] + if mod.startswith("sentry_sdk."): + return True + except (AttributeError, KeyError): + pass + + for flag_name in "__traceback_hide__", "__tracebackhide__": + try: + if frame.f_locals[flag_name]: + return True + except Exception: + pass + + return False + + +def iter_stacks(tb): + # type: (Optional[TracebackType]) -> Iterator[TracebackType] + tb_ = tb # type: Optional[TracebackType] + while tb_ is not None: + if not should_hide_frame(tb_.tb_frame): + yield tb_ + tb_ = tb_.tb_next + + +def get_lines_from_file( + filename, # type: str + lineno, # type: int + loader=None, # type: Optional[Any] + module=None, # type: Optional[str] +): + # type: (...) -> Tuple[List[Annotated[str]], Optional[Annotated[str]], List[Annotated[str]]] + context_lines = 5 + source = None + if loader is not None and hasattr(loader, "get_source"): + try: + source_str = loader.get_source(module) # type: Optional[str] + except (ImportError, IOError): + source_str = None + if source_str is not None: + source = source_str.splitlines() + + if source is None: + try: + source = linecache.getlines(filename) + except (OSError, IOError): + return [], None, [] + + if not source: + return [], None, [] + + lower_bound = max(0, lineno - context_lines) + upper_bound = min(lineno + 1 + context_lines, len(source)) + + try: + pre_context = [ + strip_string(line.strip("\r\n")) for line in source[lower_bound:lineno] + ] + context_line = strip_string(source[lineno].strip("\r\n")) + post_context = [ + strip_string(line.strip("\r\n")) + for line in source[(lineno + 1) : upper_bound] + ] + return pre_context, context_line, post_context + except IndexError: + # the file may have changed since it was loaded into memory + return [], None, [] + + +def get_source_context( + frame, # type: FrameType + tb_lineno, # type: int +): + # type: (...) -> Tuple[List[Annotated[str]], Optional[Annotated[str]], List[Annotated[str]]] + try: + abs_path = frame.f_code.co_filename # type: Optional[str] + except Exception: + abs_path = None + try: + module = frame.f_globals["__name__"] + except Exception: + return [], None, [] + try: + loader = frame.f_globals["__loader__"] + except Exception: + loader = None + lineno = tb_lineno - 1 + if lineno is not None and abs_path: + return get_lines_from_file(abs_path, lineno, loader, module) + return [], None, [] + + +def safe_str(value): + # type: (Any) -> str + try: + return text_type(value) + except Exception: + return safe_repr(value) + + +if PY2: + + def safe_repr(value): + # type: (Any) -> str + try: + rv = repr(value).decode("utf-8", "replace") + + # At this point `rv` contains a bunch of literal escape codes, like + # this (exaggerated example): + # + # u"\\x2f" + # + # But we want to show this string as: + # + # u"/" + try: + # unicode-escape does this job, but can only decode latin1. So we + # attempt to encode in latin1. + return rv.encode("latin1").decode("unicode-escape") + except Exception: + # Since usually strings aren't latin1 this can break. In those + # cases we just give up. + return rv + except Exception: + # If e.g. the call to `repr` already fails + return "" + +else: + + def safe_repr(value): + # type: (Any) -> str + try: + return repr(value) + except Exception: + return "" + + +def filename_for_module(module, abs_path): + # type: (Optional[str], Optional[str]) -> Optional[str] + if not abs_path or not module: + return abs_path + + try: + if abs_path.endswith(".pyc"): + abs_path = abs_path[:-1] + + base_module = module.split(".", 1)[0] + if base_module == module: + return os.path.basename(abs_path) + + base_module_path = sys.modules[base_module].__file__ + if not base_module_path: + return abs_path + + return abs_path.split(base_module_path.rsplit(os.sep, 2)[0], 1)[-1].lstrip( + os.sep + ) + except Exception: + return abs_path + + +def serialize_frame(frame, tb_lineno=None, with_locals=True): + # type: (FrameType, Optional[int], bool) -> Dict[str, Any] + f_code = getattr(frame, "f_code", None) + if not f_code: + abs_path = None + function = None + else: + abs_path = frame.f_code.co_filename + function = frame.f_code.co_name + try: + module = frame.f_globals["__name__"] + except Exception: + module = None + + if tb_lineno is None: + tb_lineno = frame.f_lineno + + pre_context, context_line, post_context = get_source_context(frame, tb_lineno) + + rv = { + "filename": filename_for_module(module, abs_path) or None, + "abs_path": os.path.abspath(abs_path) if abs_path else None, + "function": function or "", + "module": module, + "lineno": tb_lineno, + "pre_context": pre_context, + "context_line": context_line, + "post_context": post_context, + } # type: Dict[str, Any] + if with_locals: + rv["vars"] = frame.f_locals + + return rv + + +def current_stacktrace(with_locals=True): + # type: (bool) -> Any + __tracebackhide__ = True + frames = [] + + f = sys._getframe() # type: Optional[FrameType] + while f is not None: + if not should_hide_frame(f): + frames.append(serialize_frame(f, with_locals=with_locals)) + f = f.f_back + + frames.reverse() + + return {"frames": frames} + + +def get_errno(exc_value): + # type: (BaseException) -> Optional[Any] + return getattr(exc_value, "errno", None) + + +def single_exception_from_error_tuple( + exc_type, # type: Optional[type] + exc_value, # type: Optional[BaseException] + tb, # type: Optional[TracebackType] + client_options=None, # type: Optional[Dict[str, Any]] + mechanism=None, # type: Optional[Dict[str, Any]] +): + # type: (...) -> Dict[str, Any] + if exc_value is not None: + errno = get_errno(exc_value) + else: + errno = None + + if errno is not None: + mechanism = mechanism or {"type": "generic"} + mechanism.setdefault("meta", {}).setdefault("errno", {}).setdefault( + "number", errno + ) + + if client_options is None: + with_locals = True + else: + with_locals = client_options["with_locals"] + + frames = [ + serialize_frame(tb.tb_frame, tb_lineno=tb.tb_lineno, with_locals=with_locals) + for tb in iter_stacks(tb) + ] + + rv = { + "module": get_type_module(exc_type), + "type": get_type_name(exc_type), + "value": safe_str(exc_value), + "mechanism": mechanism, + } + + if frames: + rv["stacktrace"] = {"frames": frames} + + return rv + + +HAS_CHAINED_EXCEPTIONS = hasattr(Exception, "__suppress_context__") + +if HAS_CHAINED_EXCEPTIONS: + + def walk_exception_chain(exc_info): + # type: (ExcInfo) -> Iterator[ExcInfo] + exc_type, exc_value, tb = exc_info + + seen_exceptions = [] + seen_exception_ids = set() # type: Set[int] + + while ( + exc_type is not None + and exc_value is not None + and id(exc_value) not in seen_exception_ids + ): + yield exc_type, exc_value, tb + + # Avoid hashing random types we don't know anything + # about. Use the list to keep a ref so that the `id` is + # not used for another object. + seen_exceptions.append(exc_value) + seen_exception_ids.add(id(exc_value)) + + if exc_value.__suppress_context__: + cause = exc_value.__cause__ + else: + cause = exc_value.__context__ + if cause is None: + break + exc_type = type(cause) + exc_value = cause + tb = getattr(cause, "__traceback__", None) + +else: + + def walk_exception_chain(exc_info): + # type: (ExcInfo) -> Iterator[ExcInfo] + yield exc_info + + +def exceptions_from_error_tuple( + exc_info, # type: ExcInfo + client_options=None, # type: Optional[Dict[str, Any]] + mechanism=None, # type: Optional[Dict[str, Any]] +): + # type: (...) -> List[Dict[str, Any]] + exc_type, exc_value, tb = exc_info + rv = [] + for exc_type, exc_value, tb in walk_exception_chain(exc_info): + rv.append( + single_exception_from_error_tuple( + exc_type, exc_value, tb, client_options, mechanism + ) + ) + + rv.reverse() + + return rv + + +def to_string(value): + # type: (str) -> str + try: + return text_type(value) + except UnicodeDecodeError: + return repr(value)[1:-1] + + +def iter_event_stacktraces(event): + # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + if "stacktrace" in event: + yield event["stacktrace"] + if "threads" in event: + for thread in event["threads"].get("values") or (): + if "stacktrace" in thread: + yield thread["stacktrace"] + if "exception" in event: + for exception in event["exception"].get("values") or (): + if "stacktrace" in exception: + yield exception["stacktrace"] + + +def iter_event_frames(event): + # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + for stacktrace in iter_event_stacktraces(event): + for frame in stacktrace.get("frames") or (): + yield frame + + +def handle_in_app(event, in_app_exclude=None, in_app_include=None): + # type: (Dict[str, Any], Optional[List[str]], Optional[List[str]]) -> Dict[str, Any] + for stacktrace in iter_event_stacktraces(event): + handle_in_app_impl( + stacktrace.get("frames"), + in_app_exclude=in_app_exclude, + in_app_include=in_app_include, + ) + + return event + + +def handle_in_app_impl(frames, in_app_exclude, in_app_include): + # type: (Any, Optional[List[str]], Optional[List[str]]) -> Optional[Any] + if not frames: + return None + + any_in_app = False + for frame in frames: + in_app = frame.get("in_app") + if in_app is not None: + if in_app: + any_in_app = True + continue + + module = frame.get("module") + if not module: + continue + elif _module_in_set(module, in_app_include): + frame["in_app"] = True + any_in_app = True + elif _module_in_set(module, in_app_exclude): + frame["in_app"] = False + + if not any_in_app: + for frame in frames: + if frame.get("in_app") is None: + frame["in_app"] = True + + return frames + + +def exc_info_from_error(error): + # type: (Union[BaseException, ExcInfo]) -> ExcInfo + if isinstance(error, tuple) and len(error) == 3: + exc_type, exc_value, tb = error + elif isinstance(error, BaseException): + tb = getattr(error, "__traceback__", None) + if tb is not None: + exc_type = type(error) + exc_value = error + else: + exc_type, exc_value, tb = sys.exc_info() + if exc_value is not error: + tb = None + exc_value = error + exc_type = type(error) + + else: + raise ValueError("Expected Exception object to report, got %s!" % type(error)) + + return exc_type, exc_value, tb + + +def event_from_exception( + exc_info, # type: Union[BaseException, ExcInfo] + client_options=None, # type: Optional[Dict[str, Any]] + mechanism=None, # type: Optional[Dict[str, Any]] +): + # type: (...) -> Tuple[Dict[str, Any], Dict[str, Any]] + exc_info = exc_info_from_error(exc_info) + hint = event_hint_with_exc_info(exc_info) + return ( + { + "level": "error", + "exception": { + "values": exceptions_from_error_tuple( + exc_info, client_options, mechanism + ) + }, + }, + hint, + ) + + +def _module_in_set(name, set): + # type: (str, Optional[List[str]]) -> bool + if not set: + return False + for item in set or (): + if item == name or name.startswith(item + "."): + return True + return False + + +def strip_string(value, max_length=None): + # type: (str, Optional[int]) -> Union[AnnotatedValue, str] + # TODO: read max_length from config + if not value: + return value + + if max_length is None: + # This is intentionally not just the default such that one can patch `MAX_STRING_LENGTH` and affect `strip_string`. + max_length = MAX_STRING_LENGTH + + length = len(value) + + if length > max_length: + return AnnotatedValue( + value=value[: max_length - 3] + "...", + metadata={ + "len": length, + "rem": [["!limit", "x", max_length - 3, max_length]], + }, + ) + return value + + +def _is_contextvars_broken(): + # type: () -> bool + """ + Returns whether gevent/eventlet have patched the stdlib in a way where thread locals are now more "correct" than contextvars. + """ + try: + import gevent # type: ignore + from gevent.monkey import is_object_patched # type: ignore + + # Get the MAJOR and MINOR version numbers of Gevent + version_tuple = tuple( + [int(part) for part in re.split(r"a|b|rc|\.", gevent.__version__)[:2]] + ) + if is_object_patched("threading", "local"): + # Gevent 20.9.0 depends on Greenlet 0.4.17 which natively handles switching + # context vars when greenlets are switched, so, Gevent 20.9.0+ is all fine. + # Ref: https://github.com/gevent/gevent/blob/83c9e2ae5b0834b8f84233760aabe82c3ba065b4/src/gevent/monkey.py#L604-L609 + # Gevent 20.5, that doesn't depend on Greenlet 0.4.17 with native support + # for contextvars, is able to patch both thread locals and contextvars, in + # that case, check if contextvars are effectively patched. + if ( + # Gevent 20.9.0+ + (sys.version_info >= (3, 7) and version_tuple >= (20, 9)) + # Gevent 20.5.0+ or Python < 3.7 + or (is_object_patched("contextvars", "ContextVar")) + ): + return False + + return True + except ImportError: + pass + + try: + from eventlet.patcher import is_monkey_patched # type: ignore + + if is_monkey_patched("thread"): + return True + except ImportError: + pass + + return False + + +def _make_threadlocal_contextvars(local): + # type: (type) -> type + class ContextVar(object): + # Super-limited impl of ContextVar + + def __init__(self, name): + # type: (str) -> None + self._name = name + self._local = local() + + def get(self, default): + # type: (Any) -> Any + return getattr(self._local, "value", default) + + def set(self, value): + # type: (Any) -> None + self._local.value = value + + return ContextVar + + +def _get_contextvars(): + # type: () -> Tuple[bool, type] + """ + Figure out the "right" contextvars installation to use. Returns a + `contextvars.ContextVar`-like class with a limited API. + + See https://docs.sentry.io/platforms/python/contextvars/ for more information. + """ + if not _is_contextvars_broken(): + # aiocontextvars is a PyPI package that ensures that the contextvars + # backport (also a PyPI package) works with asyncio under Python 3.6 + # + # Import it if available. + if sys.version_info < (3, 7): + # `aiocontextvars` is absolutely required for functional + # contextvars on Python 3.6. + try: + from aiocontextvars import ContextVar # noqa + + return True, ContextVar + except ImportError: + pass + else: + # On Python 3.7 contextvars are functional. + try: + from contextvars import ContextVar + + return True, ContextVar + except ImportError: + pass + + # Fall back to basic thread-local usage. + + from threading import local + + return False, _make_threadlocal_contextvars(local) + + +HAS_REAL_CONTEXTVARS, ContextVar = _get_contextvars() + +CONTEXTVARS_ERROR_MESSAGE = """ + +With asyncio/ASGI applications, the Sentry SDK requires a functional +installation of `contextvars` to avoid leaking scope/context data across +requests. + +Please refer to https://docs.sentry.io/platforms/python/contextvars/ for more information. +""" + + +def transaction_from_function(func): + # type: (Callable[..., Any]) -> Optional[str] + # Methods in Python 2 + try: + return "%s.%s.%s" % ( + func.im_class.__module__, # type: ignore + func.im_class.__name__, # type: ignore + func.__name__, + ) + except Exception: + pass + + func_qualname = ( + getattr(func, "__qualname__", None) or getattr(func, "__name__", None) or None + ) # type: Optional[str] + + if not func_qualname: + # No idea what it is + return None + + # Methods in Python 3 + # Functions + # Classes + try: + return "%s.%s" % (func.__module__, func_qualname) + except Exception: + pass + + # Possibly a lambda + return func_qualname + + +disable_capture_event = ContextVar("disable_capture_event") + + +class ServerlessTimeoutWarning(Exception): # noqa: N818 + """Raised when a serverless method is about to reach its timeout.""" + + pass + + +class TimeoutThread(threading.Thread): + """Creates a Thread which runs (sleeps) for a time duration equal to + waiting_time and raises a custom ServerlessTimeout exception. + """ + + def __init__(self, waiting_time, configured_timeout): + # type: (float, int) -> None + threading.Thread.__init__(self) + self.waiting_time = waiting_time + self.configured_timeout = configured_timeout + self._stop_event = threading.Event() + + def stop(self): + # type: () -> None + self._stop_event.set() + + def run(self): + # type: () -> None + + self._stop_event.wait(self.waiting_time) + + if self._stop_event.is_set(): + return + + integer_configured_timeout = int(self.configured_timeout) + + # Setting up the exact integer value of configured time(in seconds) + if integer_configured_timeout < self.configured_timeout: + integer_configured_timeout = integer_configured_timeout + 1 + + # Raising Exception after timeout duration is reached + raise ServerlessTimeoutWarning( + "WARNING : Function is expected to get timed out. Configured timeout duration = {} seconds.".format( + integer_configured_timeout + ) + ) + + +def to_base64(original): + # type: (str) -> Optional[str] + """ + Convert a string to base64, via UTF-8. Returns None on invalid input. + """ + base64_string = None + + try: + utf8_bytes = original.encode("UTF-8") + base64_bytes = base64.b64encode(utf8_bytes) + base64_string = base64_bytes.decode("UTF-8") + except Exception as err: + logger.warning("Unable to encode {orig} to base64:".format(orig=original), err) + + return base64_string + + +def from_base64(base64_string): + # type: (str) -> Optional[str] + """ + Convert a string from base64, via UTF-8. Returns None on invalid input. + """ + utf8_string = None + + try: + only_valid_chars = BASE64_ALPHABET.match(base64_string) + assert only_valid_chars + + base64_bytes = base64_string.encode("UTF-8") + utf8_bytes = base64.b64decode(base64_bytes) + utf8_string = utf8_bytes.decode("UTF-8") + except Exception as err: + logger.warning( + "Unable to decode {b64} from base64:".format(b64=base64_string), err + ) + + return utf8_string diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/worker.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/worker.py new file mode 100644 index 0000000000000000000000000000000000000000..310ba3bfb4758b5fb54f7aa731439c899d952ac3 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sentry_sdk/worker.py @@ -0,0 +1,133 @@ +import os +import threading + +from time import sleep, time +from sentry_sdk._compat import check_thread_support +from sentry_sdk._queue import Queue, FullError +from sentry_sdk.utils import logger +from sentry_sdk.consts import DEFAULT_QUEUE_SIZE + +from sentry_sdk._types import MYPY + +if MYPY: + from typing import Any + from typing import Optional + from typing import Callable + + +_TERMINATOR = object() + + +class BackgroundWorker(object): + def __init__(self, queue_size=DEFAULT_QUEUE_SIZE): + # type: (int) -> None + check_thread_support() + self._queue = Queue(queue_size) # type: Queue + self._lock = threading.Lock() + self._thread = None # type: Optional[threading.Thread] + self._thread_for_pid = None # type: Optional[int] + + @property + def is_alive(self): + # type: () -> bool + if self._thread_for_pid != os.getpid(): + return False + if not self._thread: + return False + return self._thread.is_alive() + + def _ensure_thread(self): + # type: () -> None + if not self.is_alive: + self.start() + + def _timed_queue_join(self, timeout): + # type: (float) -> bool + deadline = time() + timeout + queue = self._queue + + queue.all_tasks_done.acquire() + + try: + while queue.unfinished_tasks: + delay = deadline - time() + if delay <= 0: + return False + queue.all_tasks_done.wait(timeout=delay) + + return True + finally: + queue.all_tasks_done.release() + + def start(self): + # type: () -> None + with self._lock: + if not self.is_alive: + self._thread = threading.Thread( + target=self._target, name="raven-sentry.BackgroundWorker" + ) + self._thread.daemon = True + self._thread.start() + self._thread_for_pid = os.getpid() + + def kill(self): + # type: () -> None + """ + Kill worker thread. Returns immediately. Not useful for + waiting on shutdown for events, use `flush` for that. + """ + logger.debug("background worker got kill request") + with self._lock: + if self._thread: + try: + self._queue.put_nowait(_TERMINATOR) + except FullError: + logger.debug("background worker queue full, kill failed") + + self._thread = None + self._thread_for_pid = None + + def flush(self, timeout, callback=None): + # type: (float, Optional[Any]) -> None + logger.debug("background worker got flush request") + with self._lock: + if self.is_alive and timeout > 0.0: + self._wait_flush(timeout, callback) + logger.debug("background worker flushed") + + def _wait_flush(self, timeout, callback): + # type: (float, Optional[Any]) -> None + initial_timeout = min(0.1, timeout) + if not self._timed_queue_join(initial_timeout): + pending = self._queue.qsize() + 1 + logger.debug("%d event(s) pending on flush", pending) + if callback is not None: + callback(pending, timeout) + + if not self._timed_queue_join(timeout - initial_timeout): + pending = self._queue.qsize() + 1 + logger.error("flush timed out, dropped %s events", pending) + + def submit(self, callback): + # type: (Callable[[], None]) -> bool + self._ensure_thread() + try: + self._queue.put_nowait(callback) + return True + except FullError: + return False + + def _target(self): + # type: () -> None + while True: + callback = self._queue.get() + try: + if callback is _TERMINATOR: + break + try: + callback() + except Exception: + logger.error("Failed processing job", exc_info=True) + finally: + self._queue.task_done() + sleep(0) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/_min_dependencies.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/_min_dependencies.py new file mode 100644 index 0000000000000000000000000000000000000000..3e28d6bc7dc9813a2ab39095ba7fa6bfa50285de --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/_min_dependencies.py @@ -0,0 +1,70 @@ +"""All minimum dependencies for scikit-learn.""" +from collections import defaultdict +import platform +import argparse + + +# scipy and cython should by in sync with pyproject.toml + +# NumPy version should match oldest-supported-numpy for the minimum supported +# Python version. +# see: https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg +if platform.python_implementation() == "PyPy": + NUMPY_MIN_VERSION = "1.19.2" +else: + NUMPY_MIN_VERSION = "1.17.3" + +SCIPY_MIN_VERSION = "1.3.2" +JOBLIB_MIN_VERSION = "1.0.0" +THREADPOOLCTL_MIN_VERSION = "2.0.0" +PYTEST_MIN_VERSION = "5.0.1" +CYTHON_MIN_VERSION = "0.29.24" + + +# 'build' and 'install' is included to have structured metadata for CI. +# It will NOT be included in setup's extras_require +# The values are (version_spec, comma separated tags) +dependent_packages = { + "numpy": (NUMPY_MIN_VERSION, "build, install"), + "scipy": (SCIPY_MIN_VERSION, "build, install"), + "joblib": (JOBLIB_MIN_VERSION, "install"), + "threadpoolctl": (THREADPOOLCTL_MIN_VERSION, "install"), + "cython": (CYTHON_MIN_VERSION, "build"), + "matplotlib": ("3.1.2", "benchmark, docs, examples, tests"), + "scikit-image": ("0.16.2", "docs, examples, tests"), + "pandas": ("1.0.5", "benchmark, docs, examples, tests"), + "seaborn": ("0.9.0", "docs, examples"), + "memory_profiler": ("0.57.0", "benchmark, docs"), + "pytest": (PYTEST_MIN_VERSION, "tests"), + "pytest-cov": ("2.9.0", "tests"), + "flake8": ("3.8.2", "tests"), + "black": ("22.3.0", "tests"), + "mypy": ("0.961", "tests"), + "pyamg": ("4.0.0", "tests"), + "sphinx": ("4.0.1", "docs"), + "sphinx-gallery": ("0.7.0", "docs"), + "numpydoc": ("1.2.0", "docs, tests"), + "Pillow": ("7.1.2", "docs"), + "sphinx-prompt": ("1.3.0", "docs"), + "sphinxext-opengraph": ("0.4.2", "docs"), + # XXX: Pin conda-lock to the latest released version (needs manual update + # from time to time) + "conda-lock": ("1.0.5", "maintenance"), +} + + +# create inverse mapping for setuptools +tag_to_packages: dict = defaultdict(list) +for package, (min_version, extras) in dependent_packages.items(): + for extra in extras.split(", "): + tag_to_packages[extra].append("{}>={}".format(package, min_version)) + + +# Used by CI to get the min dependencies +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Get min dependencies for a package") + + parser.add_argument("package", choices=dependent_packages) + args = parser.parse_args() + min_version = dependent_packages[args.package][0] + print(min_version) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/discriminant_analysis.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/discriminant_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..d06489fa8fd635551ba9cbfded485030930d22f3 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/discriminant_analysis.py @@ -0,0 +1,999 @@ +""" +Linear Discriminant Analysis and Quadratic Discriminant Analysis +""" + +# Authors: Clemens Brunner +# Martin Billinger +# Matthieu Perrot +# Mathieu Blondel + +# License: BSD 3-Clause + +import warnings +import numpy as np +from scipy import linalg +from scipy.special import expit +from numbers import Real + +from .base import BaseEstimator, TransformerMixin, ClassifierMixin +from .base import _ClassNamePrefixFeaturesOutMixin +from .linear_model._base import LinearClassifierMixin +from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance +from .utils.multiclass import unique_labels +from .utils.validation import check_is_fitted +from .utils.multiclass import check_classification_targets +from .utils.extmath import softmax +from .preprocessing import StandardScaler + + +__all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"] + + +def _cov(X, shrinkage=None, covariance_estimator=None): + """Estimate covariance matrix (using optional covariance_estimator). + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. + + shrinkage : {'empirical', 'auto'} or float, default=None + Shrinkage parameter, possible values: + - None or 'empirical': no shrinkage (default). + - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. + - float between 0 and 1: fixed shrinkage parameter. + + Shrinkage parameter is ignored if `covariance_estimator` + is not None. + + covariance_estimator : estimator, default=None + If not None, `covariance_estimator` is used to estimate + the covariance matrices instead of relying on the empirical + covariance estimator (with potential shrinkage). + The object should have a fit method and a ``covariance_`` attribute + like the estimators in :mod:`sklearn.covariance``. + if None the shrinkage parameter drives the estimate. + + .. versionadded:: 0.24 + + Returns + ------- + s : ndarray of shape (n_features, n_features) + Estimated covariance matrix. + """ + if covariance_estimator is None: + shrinkage = "empirical" if shrinkage is None else shrinkage + if isinstance(shrinkage, str): + if shrinkage == "auto": + sc = StandardScaler() # standardize features + X = sc.fit_transform(X) + s = ledoit_wolf(X)[0] + # rescale + s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :] + elif shrinkage == "empirical": + s = empirical_covariance(X) + else: + raise ValueError("unknown shrinkage parameter") + elif isinstance(shrinkage, Real): + if shrinkage < 0 or shrinkage > 1: + raise ValueError("shrinkage parameter must be between 0 and 1") + s = shrunk_covariance(empirical_covariance(X), shrinkage) + else: + raise TypeError("shrinkage must be a float or a string") + else: + if shrinkage is not None and shrinkage != 0: + raise ValueError( + "covariance_estimator and shrinkage parameters " + "are not None. Only one of the two can be set." + ) + covariance_estimator.fit(X) + if not hasattr(covariance_estimator, "covariance_"): + raise ValueError( + "%s does not have a covariance_ attribute" + % covariance_estimator.__class__.__name__ + ) + s = covariance_estimator.covariance_ + return s + + +def _class_means(X, y): + """Compute class means. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. + + y : array-like of shape (n_samples,) or (n_samples, n_targets) + Target values. + + Returns + ------- + means : array-like of shape (n_classes, n_features) + Class means. + """ + classes, y = np.unique(y, return_inverse=True) + cnt = np.bincount(y) + means = np.zeros(shape=(len(classes), X.shape[1])) + np.add.at(means, y, X) + means /= cnt[:, None] + return means + + +def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None): + """Compute weighted within-class covariance matrix. + + The per-class covariance are weighted by the class priors. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. + + y : array-like of shape (n_samples,) or (n_samples, n_targets) + Target values. + + priors : array-like of shape (n_classes,) + Class priors. + + shrinkage : 'auto' or float, default=None + Shrinkage parameter, possible values: + - None: no shrinkage (default). + - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. + - float between 0 and 1: fixed shrinkage parameter. + + Shrinkage parameter is ignored if `covariance_estimator` is not None. + + covariance_estimator : estimator, default=None + If not None, `covariance_estimator` is used to estimate + the covariance matrices instead of relying the empirical + covariance estimator (with potential shrinkage). + The object should have a fit method and a ``covariance_`` attribute + like the estimators in sklearn.covariance. + If None, the shrinkage parameter drives the estimate. + + .. versionadded:: 0.24 + + Returns + ------- + cov : array-like of shape (n_features, n_features) + Weighted within-class covariance matrix + """ + classes = np.unique(y) + cov = np.zeros(shape=(X.shape[1], X.shape[1])) + for idx, group in enumerate(classes): + Xg = X[y == group, :] + cov += priors[idx] * np.atleast_2d(_cov(Xg, shrinkage, covariance_estimator)) + return cov + + +class LinearDiscriminantAnalysis( + _ClassNamePrefixFeaturesOutMixin, + LinearClassifierMixin, + TransformerMixin, + BaseEstimator, +): + """Linear Discriminant Analysis. + + A classifier with a linear decision boundary, generated by fitting class + conditional densities to the data and using Bayes' rule. + + The model fits a Gaussian density to each class, assuming that all classes + share the same covariance matrix. + + The fitted model can also be used to reduce the dimensionality of the input + by projecting it to the most discriminative directions, using the + `transform` method. + + .. versionadded:: 0.17 + *LinearDiscriminantAnalysis*. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + solver : {'svd', 'lsqr', 'eigen'}, default='svd' + Solver to use, possible values: + - 'svd': Singular value decomposition (default). + Does not compute the covariance matrix, therefore this solver is + recommended for data with a large number of features. + - 'lsqr': Least squares solution. + Can be combined with shrinkage or custom covariance estimator. + - 'eigen': Eigenvalue decomposition. + Can be combined with shrinkage or custom covariance estimator. + + shrinkage : 'auto' or float, default=None + Shrinkage parameter, possible values: + - None: no shrinkage (default). + - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. + - float between 0 and 1: fixed shrinkage parameter. + + This should be left to None if `covariance_estimator` is used. + Note that shrinkage works only with 'lsqr' and 'eigen' solvers. + + priors : array-like of shape (n_classes,), default=None + The class prior probabilities. By default, the class proportions are + inferred from the training data. + + n_components : int, default=None + Number of components (<= min(n_classes - 1, n_features)) for + dimensionality reduction. If None, will be set to + min(n_classes - 1, n_features). This parameter only affects the + `transform` method. + + store_covariance : bool, default=False + If True, explicitly compute the weighted within-class covariance + matrix when solver is 'svd'. The matrix is always computed + and stored for the other solvers. + + .. versionadded:: 0.17 + + tol : float, default=1.0e-4 + Absolute threshold for a singular value of X to be considered + significant, used to estimate the rank of X. Dimensions whose + singular values are non-significant are discarded. Only used if + solver is 'svd'. + + .. versionadded:: 0.17 + + covariance_estimator : covariance estimator, default=None + If not None, `covariance_estimator` is used to estimate + the covariance matrices instead of relying on the empirical + covariance estimator (with potential shrinkage). + The object should have a fit method and a ``covariance_`` attribute + like the estimators in :mod:`sklearn.covariance`. + if None the shrinkage parameter drives the estimate. + + This should be left to None if `shrinkage` is used. + Note that `covariance_estimator` works only with 'lsqr' and 'eigen' + solvers. + + .. versionadded:: 0.24 + + Attributes + ---------- + coef_ : ndarray of shape (n_features,) or (n_classes, n_features) + Weight vector(s). + + intercept_ : ndarray of shape (n_classes,) + Intercept term. + + covariance_ : array-like of shape (n_features, n_features) + Weighted within-class covariance matrix. It corresponds to + `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the + samples in class `k`. The `C_k` are estimated using the (potentially + shrunk) biased estimator of covariance. If solver is 'svd', only + exists when `store_covariance` is True. + + explained_variance_ratio_ : ndarray of shape (n_components,) + Percentage of variance explained by each of the selected components. + If ``n_components`` is not set then all components are stored and the + sum of explained variances is equal to 1.0. Only available when eigen + or svd solver is used. + + means_ : array-like of shape (n_classes, n_features) + Class-wise means. + + priors_ : array-like of shape (n_classes,) + Class priors (sum to 1). + + scalings_ : array-like of shape (rank, n_classes - 1) + Scaling of the features in the space spanned by the class centroids. + Only available for 'svd' and 'eigen' solvers. + + xbar_ : array-like of shape (n_features,) + Overall mean. Only present if solver is 'svd'. + + classes_ : array-like of shape (n_classes,) + Unique class labels. + + n_features_in_ : int + Number of features seen during :term:`fit`. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + + See Also + -------- + QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis + >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) + >>> y = np.array([1, 1, 1, 2, 2, 2]) + >>> clf = LinearDiscriminantAnalysis() + >>> clf.fit(X, y) + LinearDiscriminantAnalysis() + >>> print(clf.predict([[-0.8, -1]])) + [1] + """ + + def __init__( + self, + solver="svd", + shrinkage=None, + priors=None, + n_components=None, + store_covariance=False, + tol=1e-4, + covariance_estimator=None, + ): + self.solver = solver + self.shrinkage = shrinkage + self.priors = priors + self.n_components = n_components + self.store_covariance = store_covariance # used only in svd solver + self.tol = tol # used only in svd solver + self.covariance_estimator = covariance_estimator + + def _solve_lsqr(self, X, y, shrinkage, covariance_estimator): + """Least squares solver. + + The least squares solver computes a straightforward solution of the + optimal decision rule based directly on the discriminant functions. It + can only be used for classification (with any covariance estimator), + because + estimation of eigenvectors is not performed. Therefore, dimensionality + reduction with the transform is not supported. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) or (n_samples, n_classes) + Target values. + + shrinkage : 'auto', float or None + Shrinkage parameter, possible values: + - None: no shrinkage. + - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. + - float between 0 and 1: fixed shrinkage parameter. + + Shrinkage parameter is ignored if `covariance_estimator` i + not None + + covariance_estimator : estimator, default=None + If not None, `covariance_estimator` is used to estimate + the covariance matrices instead of relying the empirical + covariance estimator (with potential shrinkage). + The object should have a fit method and a ``covariance_`` attribute + like the estimators in sklearn.covariance. + if None the shrinkage parameter drives the estimate. + + .. versionadded:: 0.24 + + Notes + ----- + This solver is based on [1]_, section 2.6.2, pp. 39-41. + + References + ---------- + .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification + (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN + 0-471-05669-3. + """ + self.means_ = _class_means(X, y) + self.covariance_ = _class_cov( + X, y, self.priors_, shrinkage, covariance_estimator + ) + self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T + self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log( + self.priors_ + ) + + def _solve_eigen(self, X, y, shrinkage, covariance_estimator): + """Eigenvalue solver. + + The eigenvalue solver computes the optimal solution of the Rayleigh + coefficient (basically the ratio of between class scatter to within + class scatter). This solver supports both classification and + dimensionality reduction (with any covariance estimator). + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) or (n_samples, n_targets) + Target values. + + shrinkage : 'auto', float or None + Shrinkage parameter, possible values: + - None: no shrinkage. + - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. + - float between 0 and 1: fixed shrinkage constant. + + Shrinkage parameter is ignored if `covariance_estimator` i + not None + + covariance_estimator : estimator, default=None + If not None, `covariance_estimator` is used to estimate + the covariance matrices instead of relying the empirical + covariance estimator (with potential shrinkage). + The object should have a fit method and a ``covariance_`` attribute + like the estimators in sklearn.covariance. + if None the shrinkage parameter drives the estimate. + + .. versionadded:: 0.24 + + Notes + ----- + This solver is based on [1]_, section 3.8.3, pp. 121-124. + + References + ---------- + .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification + (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN + 0-471-05669-3. + """ + self.means_ = _class_means(X, y) + self.covariance_ = _class_cov( + X, y, self.priors_, shrinkage, covariance_estimator + ) + + Sw = self.covariance_ # within scatter + St = _cov(X, shrinkage, covariance_estimator) # total scatter + Sb = St - Sw # between scatter + + evals, evecs = linalg.eigh(Sb, Sw) + self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][ + : self._max_components + ] + evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors + + self.scalings_ = evecs + self.coef_ = np.dot(self.means_, evecs).dot(evecs.T) + self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log( + self.priors_ + ) + + def _solve_svd(self, X, y): + """SVD solver. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) or (n_samples, n_targets) + Target values. + """ + n_samples, n_features = X.shape + n_classes = len(self.classes_) + + self.means_ = _class_means(X, y) + if self.store_covariance: + self.covariance_ = _class_cov(X, y, self.priors_) + + Xc = [] + for idx, group in enumerate(self.classes_): + Xg = X[y == group, :] + Xc.append(Xg - self.means_[idx]) + + self.xbar_ = np.dot(self.priors_, self.means_) + + Xc = np.concatenate(Xc, axis=0) + + # 1) within (univariate) scaling by with classes std-dev + std = Xc.std(axis=0) + # avoid division by zero in normalization + std[std == 0] = 1.0 + fac = 1.0 / (n_samples - n_classes) + + # 2) Within variance scaling + X = np.sqrt(fac) * (Xc / std) + # SVD of centered (within)scaled data + U, S, Vt = linalg.svd(X, full_matrices=False) + + rank = np.sum(S > self.tol) + # Scaling of within covariance is: V' 1/S + scalings = (Vt[:rank] / std).T / S[:rank] + fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1) + + # 3) Between variance scaling + # Scale weighted centers + X = np.dot( + ( + (np.sqrt((n_samples * self.priors_) * fac)) + * (self.means_ - self.xbar_).T + ).T, + scalings, + ) + # Centers are living in a space with n_classes-1 dim (maximum) + # Use SVD to find projection in the space spanned by the + # (n_classes) centers + _, S, Vt = linalg.svd(X, full_matrices=0) + + if self._max_components == 0: + self.explained_variance_ratio_ = np.empty((0,), dtype=S.dtype) + else: + self.explained_variance_ratio_ = (S**2 / np.sum(S**2))[ + : self._max_components + ] + + rank = np.sum(S > self.tol * S[0]) + self.scalings_ = np.dot(scalings, Vt.T[:, :rank]) + coef = np.dot(self.means_ - self.xbar_, self.scalings_) + self.intercept_ = -0.5 * np.sum(coef**2, axis=1) + np.log(self.priors_) + self.coef_ = np.dot(coef, self.scalings_.T) + self.intercept_ -= np.dot(self.xbar_, self.coef_.T) + + def fit(self, X, y): + """Fit the Linear Discriminant Analysis model. + + .. versionchanged:: 0.19 + *store_covariance* has been moved to main constructor. + + .. versionchanged:: 0.19 + *tol* has been moved to main constructor. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) + Target values. + + Returns + ------- + self : object + Fitted estimator. + """ + X, y = self._validate_data( + X, y, ensure_min_samples=2, dtype=[np.float64, np.float32] + ) + self.classes_ = unique_labels(y) + n_samples, _ = X.shape + n_classes = len(self.classes_) + + if n_samples == n_classes: + raise ValueError( + "The number of samples must be more than the number of classes." + ) + + if self.priors is None: # estimate priors from sample + _, y_t = np.unique(y, return_inverse=True) # non-negative ints + self.priors_ = np.bincount(y_t) / float(len(y)) + else: + self.priors_ = np.asarray(self.priors) + + if (self.priors_ < 0).any(): + raise ValueError("priors must be non-negative") + if not np.isclose(self.priors_.sum(), 1.0): + warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning) + self.priors_ = self.priors_ / self.priors_.sum() + + # Maximum number of components no matter what n_components is + # specified: + max_components = min(len(self.classes_) - 1, X.shape[1]) + + if self.n_components is None: + self._max_components = max_components + else: + if self.n_components > max_components: + raise ValueError( + "n_components cannot be larger than min(n_features, n_classes - 1)." + ) + self._max_components = self.n_components + + if self.solver == "svd": + if self.shrinkage is not None: + raise NotImplementedError("shrinkage not supported") + if self.covariance_estimator is not None: + raise ValueError( + "covariance estimator " + "is not supported " + "with svd solver. Try another solver" + ) + self._solve_svd(X, y) + elif self.solver == "lsqr": + self._solve_lsqr( + X, + y, + shrinkage=self.shrinkage, + covariance_estimator=self.covariance_estimator, + ) + elif self.solver == "eigen": + self._solve_eigen( + X, + y, + shrinkage=self.shrinkage, + covariance_estimator=self.covariance_estimator, + ) + else: + raise ValueError( + "unknown solver {} (valid solvers are 'svd', " + "'lsqr', and 'eigen').".format(self.solver) + ) + if self.classes_.size == 2: # treat binary case as a special case + self.coef_ = np.array( + self.coef_[1, :] - self.coef_[0, :], ndmin=2, dtype=X.dtype + ) + self.intercept_ = np.array( + self.intercept_[1] - self.intercept_[0], ndmin=1, dtype=X.dtype + ) + self._n_features_out = self._max_components + return self + + def transform(self, X): + """Project data to maximize class separation. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. + + Returns + ------- + X_new : ndarray of shape (n_samples, n_components) or \ + (n_samples, min(rank, n_components)) + Transformed data. In the case of the 'svd' solver, the shape + is (n_samples, min(rank, n_components)). + """ + if self.solver == "lsqr": + raise NotImplementedError( + "transform not implemented for 'lsqr' solver (use 'svd' or 'eigen')." + ) + check_is_fitted(self) + + X = self._validate_data(X, reset=False) + if self.solver == "svd": + X_new = np.dot(X - self.xbar_, self.scalings_) + elif self.solver == "eigen": + X_new = np.dot(X, self.scalings_) + + return X_new[:, : self._max_components] + + def predict_proba(self, X): + """Estimate probability. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. + + Returns + ------- + C : ndarray of shape (n_samples, n_classes) + Estimated probabilities. + """ + check_is_fitted(self) + + decision = self.decision_function(X) + if self.classes_.size == 2: + proba = expit(decision) + return np.vstack([1 - proba, proba]).T + else: + return softmax(decision) + + def predict_log_proba(self, X): + """Estimate log probability. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. + + Returns + ------- + C : ndarray of shape (n_samples, n_classes) + Estimated log probabilities. + """ + prediction = self.predict_proba(X) + prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny + return np.log(prediction) + + def decision_function(self, X): + """Apply decision function to an array of samples. + + The decision function is equal (up to a constant factor) to the + log-posterior of the model, i.e. `log p(y = k | x)`. In a binary + classification setting this instead corresponds to the difference + `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Array of samples (test vectors). + + Returns + ------- + C : ndarray of shape (n_samples,) or (n_samples, n_classes) + Decision function values related to each class, per sample. + In the two-class case, the shape is (n_samples,), giving the + log likelihood ratio of the positive class. + """ + # Only override for the doc + return super().decision_function(X) + + +class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator): + """Quadratic Discriminant Analysis. + + A classifier with a quadratic decision boundary, generated + by fitting class conditional densities to the data + and using Bayes' rule. + + The model fits a Gaussian density to each class. + + .. versionadded:: 0.17 + *QuadraticDiscriminantAnalysis* + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + priors : ndarray of shape (n_classes,), default=None + Class priors. By default, the class proportions are inferred from the + training data. + + reg_param : float, default=0.0 + Regularizes the per-class covariance estimates by transforming S2 as + ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``, + where S2 corresponds to the `scaling_` attribute of a given class. + + store_covariance : bool, default=False + If True, the class covariance matrices are explicitly computed and + stored in the `self.covariance_` attribute. + + .. versionadded:: 0.17 + + tol : float, default=1.0e-4 + Absolute threshold for a singular value to be considered significant, + used to estimate the rank of `Xk` where `Xk` is the centered matrix + of samples in class k. This parameter does not affect the + predictions. It only controls a warning that is raised when features + are considered to be colinear. + + .. versionadded:: 0.17 + + Attributes + ---------- + covariance_ : list of len n_classes of ndarray \ + of shape (n_features, n_features) + For each class, gives the covariance matrix estimated using the + samples of that class. The estimations are unbiased. Only present if + `store_covariance` is True. + + means_ : array-like of shape (n_classes, n_features) + Class-wise means. + + priors_ : array-like of shape (n_classes,) + Class priors (sum to 1). + + rotations_ : list of len n_classes of ndarray of shape (n_features, n_k) + For each class k an array of shape (n_features, n_k), where + ``n_k = min(n_features, number of elements in class k)`` + It is the rotation of the Gaussian distribution, i.e. its + principal axis. It corresponds to `V`, the matrix of eigenvectors + coming from the SVD of `Xk = U S Vt` where `Xk` is the centered + matrix of samples from class k. + + scalings_ : list of len n_classes of ndarray of shape (n_k,) + For each class, contains the scaling of + the Gaussian distributions along its principal axes, i.e. the + variance in the rotated coordinate system. It corresponds to `S^2 / + (n_samples - 1)`, where `S` is the diagonal matrix of singular values + from the SVD of `Xk`, where `Xk` is the centered matrix of samples + from class k. + + classes_ : ndarray of shape (n_classes,) + Unique class labels. + + n_features_in_ : int + Number of features seen during :term:`fit`. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + + See Also + -------- + LinearDiscriminantAnalysis : Linear Discriminant Analysis. + + Examples + -------- + >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis + >>> import numpy as np + >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) + >>> y = np.array([1, 1, 1, 2, 2, 2]) + >>> clf = QuadraticDiscriminantAnalysis() + >>> clf.fit(X, y) + QuadraticDiscriminantAnalysis() + >>> print(clf.predict([[-0.8, -1]])) + [1] + """ + + def __init__( + self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4 + ): + self.priors = np.asarray(priors) if priors is not None else None + self.reg_param = reg_param + self.store_covariance = store_covariance + self.tol = tol + + def fit(self, X, y): + """Fit the model according to the given training data and parameters. + + .. versionchanged:: 0.19 + ``store_covariances`` has been moved to main constructor as + ``store_covariance`` + + .. versionchanged:: 0.19 + ``tol`` has been moved to main constructor. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training vector, where `n_samples` is the number of samples and + `n_features` is the number of features. + + y : array-like of shape (n_samples,) + Target values (integers). + + Returns + ------- + self : object + Fitted estimator. + """ + X, y = self._validate_data(X, y) + check_classification_targets(y) + self.classes_, y = np.unique(y, return_inverse=True) + n_samples, n_features = X.shape + n_classes = len(self.classes_) + if n_classes < 2: + raise ValueError( + "The number of classes has to be greater than one; got %d class" + % (n_classes) + ) + if self.priors is None: + self.priors_ = np.bincount(y) / float(n_samples) + else: + self.priors_ = self.priors + + cov = None + store_covariance = self.store_covariance + if store_covariance: + cov = [] + means = [] + scalings = [] + rotations = [] + for ind in range(n_classes): + Xg = X[y == ind, :] + meang = Xg.mean(0) + means.append(meang) + if len(Xg) == 1: + raise ValueError( + "y has only 1 sample in class %s, covariance is ill defined." + % str(self.classes_[ind]) + ) + Xgc = Xg - meang + # Xgc = U * S * V.T + _, S, Vt = np.linalg.svd(Xgc, full_matrices=False) + rank = np.sum(S > self.tol) + if rank < n_features: + warnings.warn("Variables are collinear") + S2 = (S**2) / (len(Xg) - 1) + S2 = ((1 - self.reg_param) * S2) + self.reg_param + if self.store_covariance or store_covariance: + # cov = V * (S^2 / (n-1)) * V.T + cov.append(np.dot(S2 * Vt.T, Vt)) + scalings.append(S2) + rotations.append(Vt.T) + if self.store_covariance or store_covariance: + self.covariance_ = cov + self.means_ = np.asarray(means) + self.scalings_ = scalings + self.rotations_ = rotations + return self + + def _decision_function(self, X): + # return log posterior, see eq (4.12) p. 110 of the ESL. + check_is_fitted(self) + + X = self._validate_data(X, reset=False) + norm2 = [] + for i in range(len(self.classes_)): + R = self.rotations_[i] + S = self.scalings_[i] + Xm = X - self.means_[i] + X2 = np.dot(Xm, R * (S ** (-0.5))) + norm2.append(np.sum(X2**2, axis=1)) + norm2 = np.array(norm2).T # shape = [len(X), n_classes] + u = np.asarray([np.sum(np.log(s)) for s in self.scalings_]) + return -0.5 * (norm2 + u) + np.log(self.priors_) + + def decision_function(self, X): + """Apply decision function to an array of samples. + + The decision function is equal (up to a constant factor) to the + log-posterior of the model, i.e. `log p(y = k | x)`. In a binary + classification setting this instead corresponds to the difference + `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Array of samples (test vectors). + + Returns + ------- + C : ndarray of shape (n_samples,) or (n_samples, n_classes) + Decision function values related to each class, per sample. + In the two-class case, the shape is (n_samples,), giving the + log likelihood ratio of the positive class. + """ + dec_func = self._decision_function(X) + # handle special case of two classes + if len(self.classes_) == 2: + return dec_func[:, 1] - dec_func[:, 0] + return dec_func + + def predict(self, X): + """Perform classification on an array of test vectors X. + + The predicted class C for each sample in X is returned. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Vector to be scored, where `n_samples` is the number of samples and + `n_features` is the number of features. + + Returns + ------- + C : ndarray of shape (n_samples,) + Estimated probabilities. + """ + d = self._decision_function(X) + y_pred = self.classes_.take(d.argmax(1)) + return y_pred + + def predict_proba(self, X): + """Return posterior probabilities of classification. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Array of samples/test vectors. + + Returns + ------- + C : ndarray of shape (n_samples, n_classes) + Posterior probabilities of classification per class. + """ + values = self._decision_function(X) + # compute the likelihood of the underlying gaussian models + # up to a multiplicative constant. + likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis]) + # compute posterior probabilities + return likelihood / likelihood.sum(axis=1)[:, np.newaxis] + + def predict_log_proba(self, X): + """Return log of posterior probabilities of classification. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Array of samples/test vectors. + + Returns + ------- + C : ndarray of shape (n_samples, n_classes) + Posterior log-probabilities of classification per class. + """ + # XXX : can do better to avoid precision overflows + probas_ = self.predict_proba(X) + return np.log(probas_) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/multioutput.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/multioutput.py new file mode 100644 index 0000000000000000000000000000000000000000..095b700e3e5f779b5ccf91b44325f696c671abd0 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/sklearn/multioutput.py @@ -0,0 +1,959 @@ +""" +This module implements multioutput regression and classification. + +The estimators provided in this module are meta-estimators: they require +a base estimator to be provided in their constructor. The meta-estimator +extends single output estimators to multioutput estimators. +""" + +# Author: Tim Head +# Author: Hugo Bowne-Anderson +# Author: Chris Rivera +# Author: Michael Williamson +# Author: James Ashton Nichols +# +# License: BSD 3 clause + +import numpy as np +import scipy.sparse as sp +from joblib import Parallel + +from abc import ABCMeta, abstractmethod +from .base import BaseEstimator, clone, MetaEstimatorMixin +from .base import RegressorMixin, ClassifierMixin, is_classifier +from .model_selection import cross_val_predict +from .utils.metaestimators import available_if +from .utils import check_random_state +from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params +from .utils.multiclass import check_classification_targets +from .utils.fixes import delayed + +__all__ = [ + "MultiOutputRegressor", + "MultiOutputClassifier", + "ClassifierChain", + "RegressorChain", +] + + +def _fit_estimator(estimator, X, y, sample_weight=None, **fit_params): + estimator = clone(estimator) + if sample_weight is not None: + estimator.fit(X, y, sample_weight=sample_weight, **fit_params) + else: + estimator.fit(X, y, **fit_params) + return estimator + + +def _partial_fit_estimator( + estimator, X, y, classes=None, sample_weight=None, first_time=True +): + if first_time: + estimator = clone(estimator) + + if sample_weight is not None: + if classes is not None: + estimator.partial_fit(X, y, classes=classes, sample_weight=sample_weight) + else: + estimator.partial_fit(X, y, sample_weight=sample_weight) + else: + if classes is not None: + estimator.partial_fit(X, y, classes=classes) + else: + estimator.partial_fit(X, y) + return estimator + + +def _available_if_estimator_has(attr): + """Return a function to check if `estimator` or `estimators_` has `attr`. + + Helper for Chain implementations. + """ + + def _check(self): + return hasattr(self.estimator, attr) or all( + hasattr(est, attr) for est in self.estimators_ + ) + + return available_if(_check) + + +class _MultiOutputEstimator(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta): + @abstractmethod + def __init__(self, estimator, *, n_jobs=None): + self.estimator = estimator + self.n_jobs = n_jobs + + @_available_if_estimator_has("partial_fit") + def partial_fit(self, X, y, classes=None, sample_weight=None): + """Incrementally fit a separate model for each class output. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. + + classes : list of ndarray of shape (n_outputs,), default=None + Each array is unique classes for one output in str/int. + Can be obtained via + ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y` + is the target matrix of the entire dataset. + This argument is required for the first call to partial_fit + and can be omitted in the subsequent calls. + Note that `y` doesn't need to contain all labels in `classes`. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + + Returns + ------- + self : object + Returns a fitted instance. + """ + first_time = not hasattr(self, "estimators_") + y = self._validate_data(X="no_validation", y=y, multi_output=True) + + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + + if sample_weight is not None and not has_fit_parameter( + self.estimator, "sample_weight" + ): + raise ValueError("Underlying estimator does not support sample weights.") + + first_time = not hasattr(self, "estimators_") + + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_partial_fit_estimator)( + self.estimators_[i] if not first_time else self.estimator, + X, + y[:, i], + classes[i] if classes is not None else None, + sample_weight, + first_time, + ) + for i in range(y.shape[1]) + ) + + if first_time and hasattr(self.estimators_[0], "n_features_in_"): + self.n_features_in_ = self.estimators_[0].n_features_in_ + if first_time and hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ + + return self + + def fit(self, X, y, sample_weight=None, **fit_params): + """Fit the model to data, separately for each output variable. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. An indicator matrix turns on multilabel + estimation. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + + **fit_params : dict of string -> object + Parameters passed to the ``estimator.fit`` method of each step. + + .. versionadded:: 0.23 + + Returns + ------- + self : object + Returns a fitted instance. + """ + + if not hasattr(self.estimator, "fit"): + raise ValueError("The base estimator should implement a fit method") + + y = self._validate_data(X="no_validation", y=y, multi_output=True) + + if is_classifier(self): + check_classification_targets(y) + + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + + if sample_weight is not None and not has_fit_parameter( + self.estimator, "sample_weight" + ): + raise ValueError("Underlying estimator does not support sample weights.") + + fit_params_validated = _check_fit_params(X, fit_params) + + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_fit_estimator)( + self.estimator, X, y[:, i], sample_weight, **fit_params_validated + ) + for i in range(y.shape[1]) + ) + + if hasattr(self.estimators_[0], "n_features_in_"): + self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ + + return self + + def predict(self, X): + """Predict multi-output variable using model for each target variable. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Returns + ------- + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets predicted across multiple predictors. + Note: Separate models are generated for each predictor. + """ + check_is_fitted(self) + if not hasattr(self.estimators_[0], "predict"): + raise ValueError("The base estimator should implement a predict method") + + y = Parallel(n_jobs=self.n_jobs)( + delayed(e.predict)(X) for e in self.estimators_ + ) + + return np.asarray(y).T + + def _more_tags(self): + return {"multioutput_only": True} + + +class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator): + """Multi target regression. + + This strategy consists of fitting one regressor per target. This is a + simple strategy for extending regressors that do not natively support + multi-target regression. + + .. versionadded:: 0.18 + + Parameters + ---------- + estimator : estimator object + An estimator object implementing :term:`fit` and :term:`predict`. + + n_jobs : int or None, optional (default=None) + The number of jobs to run in parallel. + :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported + by the passed estimator) will be parallelized for each target. + + When individual estimators are fast to train or predict, + using ``n_jobs > 1`` can result in slower performance due + to the parallelism overhead. + + ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all available processes / threads. + See :term:`Glossary ` for more details. + + .. versionchanged:: 0.20 + `n_jobs` default changed from `1` to `None`. + + Attributes + ---------- + estimators_ : list of ``n_output`` estimators + Estimators used for predictions. + + n_features_in_ : int + Number of features seen during :term:`fit`. Only defined if the + underlying `estimator` exposes such an attribute when fit. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimators expose such an attribute when fit. + + .. versionadded:: 1.0 + + See Also + -------- + RegressorChain : A multi-label model that arranges regressions into a + chain. + MultiOutputClassifier : Classifies each output independently rather than + chaining. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.datasets import load_linnerud + >>> from sklearn.multioutput import MultiOutputRegressor + >>> from sklearn.linear_model import Ridge + >>> X, y = load_linnerud(return_X_y=True) + >>> regr = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y) + >>> regr.predict(X[[0]]) + array([[176..., 35..., 57...]]) + """ + + def __init__(self, estimator, *, n_jobs=None): + super().__init__(estimator, n_jobs=n_jobs) + + @_available_if_estimator_has("partial_fit") + def partial_fit(self, X, y, sample_weight=None): + """Incrementally fit the model to data, for each output variable. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + + Returns + ------- + self : object + Returns a fitted instance. + """ + super().partial_fit(X, y, sample_weight=sample_weight) + + +class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator): + """Multi target classification. + + This strategy consists of fitting one classifier per target. This is a + simple strategy for extending classifiers that do not natively support + multi-target classification. + + Parameters + ---------- + estimator : estimator object + An estimator object implementing :term:`fit`, :term:`score` and + :term:`predict_proba`. + + n_jobs : int or None, optional (default=None) + The number of jobs to run in parallel. + :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported + by the passed estimator) will be parallelized for each target. + + When individual estimators are fast to train or predict, + using ``n_jobs > 1`` can result in slower performance due + to the parallelism overhead. + + ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all available processes / threads. + See :term:`Glossary ` for more details. + + .. versionchanged:: 0.20 + `n_jobs` default changed from `1` to `None`. + + Attributes + ---------- + classes_ : ndarray of shape (n_classes,) + Class labels. + + estimators_ : list of ``n_output`` estimators + Estimators used for predictions. + + n_features_in_ : int + Number of features seen during :term:`fit`. Only defined if the + underlying `estimator` exposes such an attribute when fit. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimators expose such an attribute when fit. + + .. versionadded:: 1.0 + + See Also + -------- + ClassifierChain : A multi-label model that arranges binary classifiers + into a chain. + MultiOutputRegressor : Fits one regressor per target variable. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.datasets import make_multilabel_classification + >>> from sklearn.multioutput import MultiOutputClassifier + >>> from sklearn.linear_model import LogisticRegression + >>> X, y = make_multilabel_classification(n_classes=3, random_state=0) + >>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y) + >>> clf.predict(X[-2:]) + array([[1, 1, 1], + [1, 0, 1]]) + """ + + def __init__(self, estimator, *, n_jobs=None): + super().__init__(estimator, n_jobs=n_jobs) + + def fit(self, X, Y, sample_weight=None, **fit_params): + """Fit the model to data matrix X and targets Y. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Y : array-like of shape (n_samples, n_classes) + The target values. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying classifier supports sample + weights. + + **fit_params : dict of string -> object + Parameters passed to the ``estimator.fit`` method of each step. + + .. versionadded:: 0.23 + + Returns + ------- + self : object + Returns a fitted instance. + """ + super().fit(X, Y, sample_weight, **fit_params) + self.classes_ = [estimator.classes_ for estimator in self.estimators_] + return self + + def _check_predict_proba(self): + if hasattr(self, "estimators_"): + # raise an AttributeError if `predict_proba` does not exist for + # each estimator + [getattr(est, "predict_proba") for est in self.estimators_] + return True + # raise an AttributeError if `predict_proba` does not exist for the + # unfitted estimator + getattr(self.estimator, "predict_proba") + return True + + @available_if(_check_predict_proba) + def predict_proba(self, X): + """Return prediction probabilities for each class of each output. + + This method will raise a ``ValueError`` if any of the + estimators do not have ``predict_proba``. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The input data. + + Returns + ------- + p : array of shape (n_samples, n_classes), or a list of n_outputs \ + such arrays if n_outputs > 1. + The class probabilities of the input samples. The order of the + classes corresponds to that in the attribute :term:`classes_`. + + .. versionchanged:: 0.19 + This function now returns a list of arrays where the length of + the list is ``n_outputs``, and each array is (``n_samples``, + ``n_classes``) for that particular output. + """ + check_is_fitted(self) + results = [estimator.predict_proba(X) for estimator in self.estimators_] + return results + + def score(self, X, y): + """Return the mean accuracy on the given test data and labels. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Test samples. + + y : array-like of shape (n_samples, n_outputs) + True values for X. + + Returns + ------- + scores : float + Mean accuracy of predicted target versus true target. + """ + check_is_fitted(self) + n_outputs_ = len(self.estimators_) + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi target classification but has only one" + ) + if y.shape[1] != n_outputs_: + raise ValueError( + "The number of outputs of Y for fit {0} and" + " score {1} should be same".format(n_outputs_, y.shape[1]) + ) + y_pred = self.predict(X) + return np.mean(np.all(y == y_pred, axis=1)) + + def _more_tags(self): + # FIXME + return {"_skip_test": True} + + +def _available_if_base_estimator_has(attr): + """Return a function to check if `base_estimator` or `estimators_` has `attr`. + + Helper for Chain implementations. + """ + + def _check(self): + return hasattr(self.base_estimator, attr) or all( + hasattr(est, attr) for est in self.estimators_ + ) + + return available_if(_check) + + +class _BaseChain(BaseEstimator, metaclass=ABCMeta): + def __init__(self, base_estimator, *, order=None, cv=None, random_state=None): + self.base_estimator = base_estimator + self.order = order + self.cv = cv + self.random_state = random_state + + @abstractmethod + def fit(self, X, Y, **fit_params): + """Fit the model to data matrix X and targets Y. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Y : array-like of shape (n_samples, n_classes) + The target values. + + **fit_params : dict of string -> object + Parameters passed to the `fit` method of each step. + + .. versionadded:: 0.23 + + Returns + ------- + self : object + Returns a fitted instance. + """ + X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True) + + random_state = check_random_state(self.random_state) + self.order_ = self.order + if isinstance(self.order_, tuple): + self.order_ = np.array(self.order_) + + if self.order_ is None: + self.order_ = np.array(range(Y.shape[1])) + elif isinstance(self.order_, str): + if self.order_ == "random": + self.order_ = random_state.permutation(Y.shape[1]) + elif sorted(self.order_) != list(range(Y.shape[1])): + raise ValueError("invalid order") + + self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])] + + if self.cv is None: + Y_pred_chain = Y[:, self.order_] + if sp.issparse(X): + X_aug = sp.hstack((X, Y_pred_chain), format="lil") + X_aug = X_aug.tocsr() + else: + X_aug = np.hstack((X, Y_pred_chain)) + + elif sp.issparse(X): + Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1])) + X_aug = sp.hstack((X, Y_pred_chain), format="lil") + + else: + Y_pred_chain = np.zeros((X.shape[0], Y.shape[1])) + X_aug = np.hstack((X, Y_pred_chain)) + + del Y_pred_chain + + for chain_idx, estimator in enumerate(self.estimators_): + y = Y[:, self.order_[chain_idx]] + estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params) + if self.cv is not None and chain_idx < len(self.estimators_) - 1: + col_idx = X.shape[1] + chain_idx + cv_result = cross_val_predict( + self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv + ) + if sp.issparse(X_aug): + X_aug[:, col_idx] = np.expand_dims(cv_result, 1) + else: + X_aug[:, col_idx] = cv_result + + return self + + def predict(self, X): + """Predict on the data matrix X using the ClassifierChain model. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Returns + ------- + Y_pred : array-like of shape (n_samples, n_classes) + The predicted values. + """ + check_is_fitted(self) + X = self._validate_data(X, accept_sparse=True, reset=False) + Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) + for chain_idx, estimator in enumerate(self.estimators_): + previous_predictions = Y_pred_chain[:, :chain_idx] + if sp.issparse(X): + if chain_idx == 0: + X_aug = X + else: + X_aug = sp.hstack((X, previous_predictions)) + else: + X_aug = np.hstack((X, previous_predictions)) + Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) + + inv_order = np.empty_like(self.order_) + inv_order[self.order_] = np.arange(len(self.order_)) + Y_pred = Y_pred_chain[:, inv_order] + + return Y_pred + + +class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): + """A multi-label model that arranges binary classifiers into a chain. + + Each model makes a prediction in the order specified by the chain using + all of the available features provided to the model plus the predictions + of models that are earlier in the chain. + + Read more in the :ref:`User Guide `. + + .. versionadded:: 0.19 + + Parameters + ---------- + base_estimator : estimator + The base estimator from which the classifier chain is built. + + order : array-like of shape (n_outputs,) or 'random', default=None + If `None`, the order will be determined by the order of columns in + the label matrix Y.:: + + order = [0, 1, 2, ..., Y.shape[1] - 1] + + The order of the chain can be explicitly set by providing a list of + integers. For example, for a chain of length 5.:: + + order = [1, 3, 2, 4, 0] + + means that the first model in the chain will make predictions for + column 1 in the Y matrix, the second model will make predictions + for column 3, etc. + + If order is `random` a random ordering will be used. + + cv : int, cross-validation generator or an iterable, default=None + Determines whether to use cross validated predictions or true + labels for the results of previous estimators in the chain. + Possible inputs for cv are: + + - None, to use true labels when fitting, + - integer, to specify the number of folds in a (Stratified)KFold, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + random_state : int, RandomState instance or None, optional (default=None) + If ``order='random'``, determines random number generation for the + chain order. + In addition, it controls the random seed given at each `base_estimator` + at each chaining iteration. Thus, it is only used when `base_estimator` + exposes a `random_state`. + Pass an int for reproducible output across multiple function calls. + See :term:`Glossary `. + + Attributes + ---------- + classes_ : list + A list of arrays of length ``len(estimators_)`` containing the + class labels for each estimator in the chain. + + estimators_ : list + A list of clones of base_estimator. + + order_ : list + The order of labels in the classifier chain. + + n_features_in_ : int + Number of features seen during :term:`fit`. Only defined if the + underlying `base_estimator` exposes such an attribute when fit. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + + See Also + -------- + RegressorChain : Equivalent for regression. + MultioutputClassifier : Classifies each output independently rather than + chaining. + + References + ---------- + Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, "Classifier + Chains for Multi-label Classification", 2009. + + Examples + -------- + >>> from sklearn.datasets import make_multilabel_classification + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.multioutput import ClassifierChain + >>> X, Y = make_multilabel_classification( + ... n_samples=12, n_classes=3, random_state=0 + ... ) + >>> X_train, X_test, Y_train, Y_test = train_test_split( + ... X, Y, random_state=0 + ... ) + >>> base_lr = LogisticRegression(solver='lbfgs', random_state=0) + >>> chain = ClassifierChain(base_lr, order='random', random_state=0) + >>> chain.fit(X_train, Y_train).predict(X_test) + array([[1., 1., 0.], + [1., 0., 0.], + [0., 1., 0.]]) + >>> chain.predict_proba(X_test) + array([[0.8387..., 0.9431..., 0.4576...], + [0.8878..., 0.3684..., 0.2640...], + [0.0321..., 0.9935..., 0.0625...]]) + """ + + def fit(self, X, Y): + """Fit the model to data matrix X and targets Y. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Y : array-like of shape (n_samples, n_classes) + The target values. + + Returns + ------- + self : object + Class instance. + """ + super().fit(X, Y) + self.classes_ = [ + estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_) + ] + return self + + @_available_if_base_estimator_has("predict_proba") + def predict_proba(self, X): + """Predict probability estimates. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Returns + ------- + Y_prob : array-like of shape (n_samples, n_classes) + The predicted probabilities. + """ + X = self._validate_data(X, accept_sparse=True, reset=False) + Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_))) + Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) + for chain_idx, estimator in enumerate(self.estimators_): + previous_predictions = Y_pred_chain[:, :chain_idx] + if sp.issparse(X): + X_aug = sp.hstack((X, previous_predictions)) + else: + X_aug = np.hstack((X, previous_predictions)) + Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1] + Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) + inv_order = np.empty_like(self.order_) + inv_order[self.order_] = np.arange(len(self.order_)) + Y_prob = Y_prob_chain[:, inv_order] + + return Y_prob + + @_available_if_base_estimator_has("decision_function") + def decision_function(self, X): + """Evaluate the decision_function of the models in the chain. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The input data. + + Returns + ------- + Y_decision : array-like of shape (n_samples, n_classes) + Returns the decision function of the sample for each model + in the chain. + """ + X = self._validate_data(X, accept_sparse=True, reset=False) + Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_))) + Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) + for chain_idx, estimator in enumerate(self.estimators_): + previous_predictions = Y_pred_chain[:, :chain_idx] + if sp.issparse(X): + X_aug = sp.hstack((X, previous_predictions)) + else: + X_aug = np.hstack((X, previous_predictions)) + Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug) + Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) + + inv_order = np.empty_like(self.order_) + inv_order[self.order_] = np.arange(len(self.order_)) + Y_decision = Y_decision_chain[:, inv_order] + + return Y_decision + + def _more_tags(self): + return {"_skip_test": True, "multioutput_only": True} + + +class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain): + """A multi-label model that arranges regressions into a chain. + + Each model makes a prediction in the order specified by the chain using + all of the available features provided to the model plus the predictions + of models that are earlier in the chain. + + Read more in the :ref:`User Guide `. + + .. versionadded:: 0.20 + + Parameters + ---------- + base_estimator : estimator + The base estimator from which the regressor chain is built. + + order : array-like of shape (n_outputs,) or 'random', default=None + If `None`, the order will be determined by the order of columns in + the label matrix Y.:: + + order = [0, 1, 2, ..., Y.shape[1] - 1] + + The order of the chain can be explicitly set by providing a list of + integers. For example, for a chain of length 5.:: + + order = [1, 3, 2, 4, 0] + + means that the first model in the chain will make predictions for + column 1 in the Y matrix, the second model will make predictions + for column 3, etc. + + If order is 'random' a random ordering will be used. + + cv : int, cross-validation generator or an iterable, default=None + Determines whether to use cross validated predictions or true + labels for the results of previous estimators in the chain. + Possible inputs for cv are: + + - None, to use true labels when fitting, + - integer, to specify the number of folds in a (Stratified)KFold, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + random_state : int, RandomState instance or None, optional (default=None) + If ``order='random'``, determines random number generation for the + chain order. + In addition, it controls the random seed given at each `base_estimator` + at each chaining iteration. Thus, it is only used when `base_estimator` + exposes a `random_state`. + Pass an int for reproducible output across multiple function calls. + See :term:`Glossary `. + + Attributes + ---------- + estimators_ : list + A list of clones of base_estimator. + + order_ : list + The order of labels in the classifier chain. + + n_features_in_ : int + Number of features seen during :term:`fit`. Only defined if the + underlying `base_estimator` exposes such an attribute when fit. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + + See Also + -------- + ClassifierChain : Equivalent for classification. + MultiOutputRegressor : Learns each output independently rather than + chaining. + + Examples + -------- + >>> from sklearn.multioutput import RegressorChain + >>> from sklearn.linear_model import LogisticRegression + >>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial') + >>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]] + >>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y) + >>> chain.predict(X) + array([[0., 2.], + [1., 1.], + [2., 0.]]) + """ + + def fit(self, X, Y, **fit_params): + """Fit the model to data matrix X and targets Y. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Y : array-like of shape (n_samples, n_classes) + The target values. + + **fit_params : dict of string -> object + Parameters passed to the `fit` method at each step + of the regressor chain. + + .. versionadded:: 0.23 + + Returns + ------- + self : object + Returns a fitted instance. + """ + super().fit(X, Y, **fit_params) + return self + + def _more_tags(self): + return {"multioutput_only": True} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fe86b59d782bdb09d70aa44f80370be95a667c83 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/__init__.py @@ -0,0 +1,85 @@ +""" +Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more +""" +from __future__ import absolute_import + +# Set default logging handler to avoid "No handler found" warnings. +import logging +import warnings +from logging import NullHandler + +from . import exceptions +from ._version import __version__ +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url +from .filepost import encode_multipart_formdata +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .util.request import make_headers +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host + +__author__ = "Andrey Petrov (andrey.petrov@shazow.net)" +__license__ = "MIT" +__version__ = __version__ + +__all__ = ( + "HTTPConnectionPool", + "HTTPSConnectionPool", + "PoolManager", + "ProxyManager", + "HTTPResponse", + "Retry", + "Timeout", + "add_stderr_logger", + "connection_from_url", + "disable_warnings", + "encode_multipart_formdata", + "get_host", + "make_headers", + "proxy_from_url", +) + +logging.getLogger(__name__).addHandler(NullHandler()) + + +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s")) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug("Added a stderr logging handler to logger: %s", __name__) + return handler + + +# ... Clean up. +del NullHandler + + +# All warning filters *must* be appended unless you're really certain that they +# shouldn't be: otherwise, it's very hard for users to use most Python +# mechanisms to silence them. +# SecurityWarning's always go off by default. +warnings.simplefilter("always", exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True) +# SNIMissingWarnings should go off only once. +warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True) + + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter("ignore", category) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/_collections.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/_collections.py new file mode 100644 index 0000000000000000000000000000000000000000..da9857e986d89acac3ba05a6735dc08c249bde1a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/_collections.py @@ -0,0 +1,337 @@ +from __future__ import absolute_import + +try: + from collections.abc import Mapping, MutableMapping +except ImportError: + from collections import Mapping, MutableMapping +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + + +from collections import OrderedDict + +from .exceptions import InvalidHeader +from .packages import six +from .packages.six import iterkeys, itervalues + +__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"] + + +_Null = object() + + +class RecentlyUsedContainer(MutableMapping): + """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. + + :param maxsize: + Maximum number of recent elements to retain. + + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ + + ContainerCls = OrderedDict + + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func + + self._container = self.ContainerCls() + self.lock = RLock() + + def __getitem__(self, key): + # Re-insert the item, moving it to the end of the eviction line. + with self.lock: + item = self._container.pop(key) + self._container[key] = item + return item + + def __setitem__(self, key, value): + evicted_value = _Null + with self.lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value + + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) + + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) + + def __delitem__(self, key): + with self.lock: + value = self._container.pop(key) + + if self.dispose_func: + self.dispose_func(value) + + def __len__(self): + with self.lock: + return len(self._container) + + def __iter__(self): + raise NotImplementedError( + "Iteration over this class is unlikely to be threadsafe." + ) + + def clear(self): + with self.lock: + # Copy pointers to all values, then wipe the mapping + values = list(itervalues(self._container)) + self._container.clear() + + if self.dispose_func: + for value in values: + self.dispose_func(value) + + def keys(self): + with self.lock: + return list(iterkeys(self._container)) + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 7230. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + """ + + def __init__(self, headers=None, **kwargs): + super(HTTPHeaderDict, self).__init__() + self._container = OrderedDict() + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + self._container[key.lower()] = [key, val] + return self._container[key.lower()] + + def __getitem__(self, key): + val = self._container[key.lower()] + return ", ".join(val[1:]) + + def __delitem__(self, key): + del self._container[key.lower()] + + def __contains__(self, key): + return key.lower() in self._container + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, "keys"): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return dict((k.lower(), v) for k, v in self.itermerged()) == dict( + (k.lower(), v) for k, v in other.itermerged() + ) + + def __ne__(self, other): + return not self.__eq__(other) + + if six.PY2: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + + def pop(self, key, default=__marker): + """D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + """ + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + key_lower = key.lower() + new_vals = [key, val] + # Keep the common case aka no item present as fast as possible + vals = self._container.setdefault(key_lower, new_vals) + if new_vals is not vals: + vals.append(val) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError( + "extend() takes at most 1 positional " + "arguments ({0} given)".format(len(args)) + ) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) + + def getlist(self, key, default=__marker): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + try: + vals = self._container[key.lower()] + except KeyError: + if default is self.__marker: + return [] + return default + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist + + # Backwards compatibility for http.cookiejar + get_all = getlist + + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + + def _copy_from(self, other): + for key in other: + val = other.getlist(key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + self._container[key.lower()] = [key] + val + + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = self._container[key.lower()] + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = self._container[key.lower()] + yield val[0], ", ".join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + obs_fold_continued_leaders = (" ", "\t") + headers = [] + + for line in message.headers: + if line.startswith(obs_fold_continued_leaders): + if not headers: + # We received a header line that starts with OWS as described + # in RFC-7230 S3.2.4. This indicates a multiline header, but + # there exists no previous header to which we can attach it. + raise InvalidHeader( + "Header continuation with no previous header: %s" % line + ) + else: + key, value = headers[-1] + headers[-1] = (key, value + " " + line.strip()) + continue + + key, value = line.split(":", 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/_version.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..d905b69755a10be256d98f38f798f622be082c0b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/_version.py @@ -0,0 +1,2 @@ +# This file is protected via CODEOWNERS +__version__ = "1.26.9" diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/connection.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/connection.py new file mode 100644 index 0000000000000000000000000000000000000000..7bf395bdac1127bfab76eef991ba97ecbdabe7a5 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/connection.py @@ -0,0 +1,567 @@ +from __future__ import absolute_import + +import datetime +import logging +import os +import re +import socket +import warnings +from socket import error as SocketError +from socket import timeout as SocketTimeout + +from .packages import six +from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection +from .packages.six.moves.http_client import HTTPException # noqa: F401 +from .util.proxy import create_proxy_ssl_context + +try: # Compiled with SSL? + import ssl + + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None + + class BaseSSLError(BaseException): + pass + + +try: + # Python 3: not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: + # Python 2 + class ConnectionError(Exception): + pass + + +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + BrokenPipeError = BrokenPipeError +except NameError: # Python 2: + + class BrokenPipeError(Exception): + pass + + +from ._collections import HTTPHeaderDict # noqa (historical, removed in v2) +from ._version import __version__ +from .exceptions import ( + ConnectTimeoutError, + NewConnectionError, + SubjectAltNameWarning, + SystemTimeWarning, +) +from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection +from .util.ssl_ import ( + assert_fingerprint, + create_urllib3_context, + is_ipaddress, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .util.ssl_match_hostname import CertificateError, match_hostname + +log = logging.getLogger(__name__) + +port_by_scheme = {"http": 80, "https": 443} + +# When it comes time to update this value as a part of regular maintenance +# (ie test_recent_date is failing) update it to ~6 months before the current date. +RECENT_DATE = datetime.date(2020, 7, 1) + +_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on :class:`http.client.HTTPConnection` but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass: + + .. code-block:: python + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + """ + + default_port = port_by_scheme["http"] + + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + #: Whether this connection verifies the host's certificate. + is_verified = False + + #: Whether this proxy connection (if used) verifies the proxy host's + #: certificate. + proxy_is_verified = None + + def __init__(self, *args, **kw): + if not six.PY2: + kw.pop("strict", None) + + # Pre-set source_address. + self.source_address = kw.get("source_address") + + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop("socket_options", self.default_socket_options) + + # Proxy options provided by the user. + self.proxy = kw.pop("proxy", None) + self.proxy_config = kw.pop("proxy_config", None) + + _HTTPConnection.__init__(self, *args, **kw) + + @property + def host(self): + """ + Getter method to remove any trailing dots that indicate the hostname is an FQDN. + + In general, SSL certificates don't include the trailing dot indicating a + fully-qualified domain name, and thus, they don't validate properly when + checked against a domain name that includes the dot. In addition, some + servers may not expect to receive the trailing dot when provided. + + However, the hostname with trailing dot is critical to DNS resolution; doing a + lookup with the trailing dot will properly only resolve the appropriate FQDN, + whereas a lookup without a trailing dot will search the system's search domain + list. Thus, it's important to keep the original host around for use only in + those cases where it's appropriate (i.e., when doing DNS lookup to establish the + actual TCP connection across which we're going to send HTTP requests). + """ + return self._dns_host.rstrip(".") + + @host.setter + def host(self, value): + """ + Setter for the `host` property. + + We assume that only urllib3 uses the _dns_host attribute; httplib itself + only uses `host`, and it seems reasonable that other libraries follow suit. + """ + self._dns_host = value + + def _new_conn(self): + """Establish a socket connection and set nodelay settings on it. + + :return: New socket connection. + """ + extra_kw = {} + if self.source_address: + extra_kw["source_address"] = self.source_address + + if self.socket_options: + extra_kw["socket_options"] = self.socket_options + + try: + conn = connection.create_connection( + (self._dns_host, self.port), self.timeout, **extra_kw + ) + + except SocketTimeout: + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" + % (self.host, self.timeout), + ) + + except SocketError as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e + ) + + return conn + + def _is_using_tunnel(self): + # Google App Engine's httplib does not define _tunnel_host + return getattr(self, "_tunnel_host", None) + + def _prepare_conn(self, conn): + self.sock = conn + if self._is_using_tunnel(): + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + def putrequest(self, method, url, *args, **kwargs): + """ """ + # Empty docstring because the indentation of CPython's implementation + # is broken but we don't want this method in our documentation. + match = _CONTAINS_CONTROL_CHAR_RE.search(method) + if match: + raise ValueError( + "Method cannot contain non-token characters %r (found at least %r)" + % (method, match.group()) + ) + + return _HTTPConnection.putrequest(self, method, url, *args, **kwargs) + + def putheader(self, header, *values): + """ """ + if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): + _HTTPConnection.putheader(self, header, *values) + elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS: + raise ValueError( + "urllib3.util.SKIP_HEADER only supports '%s'" + % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),) + ) + + def request(self, method, url, body=None, headers=None): + if headers is None: + headers = {} + else: + # Avoid modifying the headers passed into .request() + headers = headers.copy() + if "user-agent" not in (six.ensure_str(k.lower()) for k in headers): + headers["User-Agent"] = _get_default_user_agent() + super(HTTPConnection, self).request(method, url, body=body, headers=headers) + + def request_chunked(self, method, url, body=None, headers=None): + """ + Alternative to the common request method, which sends the + body with chunked encoding and not as one block + """ + headers = headers or {} + header_keys = set([six.ensure_str(k.lower()) for k in headers]) + skip_accept_encoding = "accept-encoding" in header_keys + skip_host = "host" in header_keys + self.putrequest( + method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host + ) + if "user-agent" not in header_keys: + self.putheader("User-Agent", _get_default_user_agent()) + for header, value in headers.items(): + self.putheader(header, value) + if "transfer-encoding" not in header_keys: + self.putheader("Transfer-Encoding", "chunked") + self.endheaders() + + if body is not None: + stringish_types = six.string_types + (bytes,) + if isinstance(body, stringish_types): + body = (body,) + for chunk in body: + if not chunk: + continue + if not isinstance(chunk, bytes): + chunk = chunk.encode("utf8") + len_str = hex(len(chunk))[2:] + to_send = bytearray(len_str.encode()) + to_send += b"\r\n" + to_send += chunk + to_send += b"\r\n" + self.send(to_send) + + # After the if clause, to always have a closed body + self.send(b"0\r\n\r\n") + + +class HTTPSConnection(HTTPConnection): + """ + Many of the parameters to this constructor are passed to the underlying SSL + socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. + """ + + default_port = port_by_scheme["https"] + + cert_reqs = None + ca_certs = None + ca_cert_dir = None + ca_cert_data = None + ssl_version = None + assert_fingerprint = None + tls_in_tls_required = False + + def __init__( + self, + host, + port=None, + key_file=None, + cert_file=None, + key_password=None, + strict=None, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + ssl_context=None, + server_hostname=None, + **kw + ): + + HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw) + + self.key_file = key_file + self.cert_file = cert_file + self.key_password = key_password + self.ssl_context = ssl_context + self.server_hostname = server_hostname + + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = "https" + + def set_cert( + self, + key_file=None, + cert_file=None, + cert_reqs=None, + key_password=None, + ca_certs=None, + assert_hostname=None, + assert_fingerprint=None, + ca_cert_dir=None, + ca_cert_data=None, + ): + """ + This method should only be called once, before the connection is used. + """ + # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also + # have an SSLContext object in which case we'll use its verify_mode. + if cert_reqs is None: + if self.ssl_context is not None: + cert_reqs = self.ssl_context.verify_mode + else: + cert_reqs = resolve_cert_reqs(None) + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.key_password = key_password + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + self.ca_cert_data = ca_cert_data + + def connect(self): + # Add certificate verification + self.sock = conn = self._new_conn() + hostname = self.host + tls_in_tls = False + + if self._is_using_tunnel(): + if self.tls_in_tls_required: + self.sock = conn = self._connect_tls_proxy(hostname, conn) + tls_in_tls = True + + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + + server_hostname = hostname + if self.server_hostname is not None: + server_hostname = self.server_hostname + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn( + ( + "System time is way off (before {0}). This will probably " + "lead to SSL verification errors" + ).format(RECENT_DATE), + SystemTimeWarning, + ) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + default_ssl_context = False + if self.ssl_context is None: + default_ssl_context = True + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(self.ssl_version), + cert_reqs=resolve_cert_reqs(self.cert_reqs), + ) + + context = self.ssl_context + context.verify_mode = resolve_cert_reqs(self.cert_reqs) + + # Try to load OS default certs if none are given. + # Works well on Windows (requires Python3.4+) + if ( + not self.ca_certs + and not self.ca_cert_dir + and not self.ca_cert_data + and default_ssl_context + and hasattr(context, "load_default_certs") + ): + context.load_default_certs() + + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + key_password=self.key_password, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + ca_cert_data=self.ca_cert_data, + server_hostname=server_hostname, + ssl_context=context, + tls_in_tls=tls_in_tls, + ) + + # If we're using all defaults and the connection + # is TLSv1 or TLSv1.1 we throw a DeprecationWarning + # for the host. + if ( + default_ssl_context + and self.ssl_version is None + and hasattr(self.sock, "version") + and self.sock.version() in {"TLSv1", "TLSv1.1"} + ): + warnings.warn( + "Negotiating TLSv1/TLSv1.1 by default is deprecated " + "and will be disabled in urllib3 v2.0.0. Connecting to " + "'%s' with '%s' can be enabled by explicitly opting-in " + "with 'ssl_version'" % (self.host, self.sock.version()), + DeprecationWarning, + ) + + if self.assert_fingerprint: + assert_fingerprint( + self.sock.getpeercert(binary_form=True), self.assert_fingerprint + ) + elif ( + context.verify_mode != ssl.CERT_NONE + and not getattr(context, "check_hostname", False) + and self.assert_hostname is not False + ): + # While urllib3 attempts to always turn off hostname matching from + # the TLS library, this cannot always be done. So we check whether + # the TLS Library still thinks it's matching hostnames. + cert = self.sock.getpeercert() + if not cert.get("subjectAltName", ()): + warnings.warn( + ( + "Certificate for {0} has no `subjectAltName`, falling back to check for a " + "`commonName` for now. This feature is being removed by major browsers and " + "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " + "for details.)".format(hostname) + ), + SubjectAltNameWarning, + ) + _match_hostname(cert, self.assert_hostname or server_hostname) + + self.is_verified = ( + context.verify_mode == ssl.CERT_REQUIRED + or self.assert_fingerprint is not None + ) + + def _connect_tls_proxy(self, hostname, conn): + """ + Establish a TLS connection to the proxy using the provided SSL context. + """ + proxy_config = self.proxy_config + ssl_context = proxy_config.ssl_context + if ssl_context: + # If the user provided a proxy context, we assume CA and client + # certificates have already been set + return ssl_wrap_socket( + sock=conn, + server_hostname=hostname, + ssl_context=ssl_context, + ) + + ssl_context = create_proxy_ssl_context( + self.ssl_version, + self.cert_reqs, + self.ca_certs, + self.ca_cert_dir, + self.ca_cert_data, + ) + + # If no cert was provided, use only the default options for server + # certificate validation + socket = ssl_wrap_socket( + sock=conn, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + ca_cert_data=self.ca_cert_data, + server_hostname=hostname, + ssl_context=ssl_context, + ) + + if ssl_context.verify_mode != ssl.CERT_NONE and not getattr( + ssl_context, "check_hostname", False + ): + # While urllib3 attempts to always turn off hostname matching from + # the TLS library, this cannot always be done. So we check whether + # the TLS Library still thinks it's matching hostnames. + cert = socket.getpeercert() + if not cert.get("subjectAltName", ()): + warnings.warn( + ( + "Certificate for {0} has no `subjectAltName`, falling back to check for a " + "`commonName` for now. This feature is being removed by major browsers and " + "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " + "for details.)".format(hostname) + ), + SubjectAltNameWarning, + ) + _match_hostname(cert, hostname) + + self.proxy_is_verified = ssl_context.verify_mode == ssl.CERT_REQUIRED + return socket + + +def _match_hostname(cert, asserted_hostname): + # Our upstream implementation of ssl.match_hostname() + # only applies this normalization to IP addresses so it doesn't + # match DNS SANs so we do the same thing! + stripped_hostname = asserted_hostname.strip("u[]") + if is_ipaddress(stripped_hostname): + asserted_hostname = stripped_hostname + + try: + match_hostname(cert, asserted_hostname) + except CertificateError as e: + log.warning( + "Certificate did not match expected hostname: %s. Certificate: %s", + asserted_hostname, + cert, + ) + # Add cert to exception and reraise so client code can inspect + # the cert when catching the exception, if they want to + e._peer_cert = cert + raise + + +def _get_default_user_agent(): + return "python-urllib3/%s" % __version__ + + +class DummyConnection(object): + """Used to detect a failed ConnectionCls import.""" + + pass + + +if not ssl: + HTTPSConnection = DummyConnection # noqa: F811 + + +VerifiedHTTPSConnection = HTTPSConnection diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py new file mode 100644 index 0000000000000000000000000000000000000000..15bffcb23a902baa14f2332fddc83a416cc783b1 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py @@ -0,0 +1,1108 @@ +from __future__ import absolute_import + +import errno +import logging +import re +import socket +import sys +import warnings +from socket import error as SocketError +from socket import timeout as SocketTimeout + +from .connection import ( + BaseSSLError, + BrokenPipeError, + DummyConnection, + HTTPConnection, + HTTPException, + HTTPSConnection, + VerifiedHTTPSConnection, + port_by_scheme, +) +from .exceptions import ( + ClosedPoolError, + EmptyPoolError, + HeaderParsingError, + HostChangedError, + InsecureRequestWarning, + LocationValueError, + MaxRetryError, + NewConnectionError, + ProtocolError, + ProxyError, + ReadTimeoutError, + SSLError, + TimeoutError, +) +from .packages import six +from .packages.six.moves import queue +from .request import RequestMethods +from .response import HTTPResponse +from .util.connection import is_connection_dropped +from .util.proxy import connection_requires_http_tunnel +from .util.queue import LifoQueue +from .util.request import set_file_position +from .util.response import assert_header_parsing +from .util.retry import Retry +from .util.ssl_match_hostname import CertificateError +from .util.timeout import Timeout +from .util.url import Url, _encode_target +from .util.url import _normalize_host as normalize_host +from .util.url import get_host, parse_url + +xrange = six.moves.xrange + +log = logging.getLogger(__name__) + +_Default = object() + + +# Pool objects +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + + .. note:: + ConnectionPool.urlopen() does not normalize or percent-encode target URIs + which is useful if your target server doesn't support percent-encoded + target URIs. + """ + + scheme = None + QueueCls = LifoQueue + + def __init__(self, host, port=None): + if not host: + raise LocationValueError("No host specified.") + + self.host = _normalize_host(host, scheme=self.scheme) + self._proxy_host = host.lower() + self.port = port + + def __str__(self): + return "%s(host=%r, port=%r)" % (type(self).__name__, self.host, self.port) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + pass + + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK} + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`http.client.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`http.client.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`http.client.HTTPConnection`. + + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + + :param timeout: + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to False, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param retries: + Retry configuration to use by default with requests in this pool. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.ProxyManager` + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.ProxyManager` + + :param \\**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. + """ + + scheme = "http" + ConnectionCls = HTTPConnection + ResponseCls = HTTPResponse + + def __init__( + self, + host, + port=None, + strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, + maxsize=1, + block=False, + headers=None, + retries=None, + _proxy=None, + _proxy_headers=None, + _proxy_config=None, + **conn_kw + ): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + + self.strict = strict + + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + + if retries is None: + retries = Retry.DEFAULT + + self.timeout = timeout + self.retries = retries + + self.pool = self.QueueCls(maxsize) + self.block = block + + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + self.proxy_config = _proxy_config + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + self.conn_kw = conn_kw + + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault("socket_options", []) + + self.conn_kw["proxy"] = self.proxy + self.conn_kw["proxy_config"] = self.proxy_config + + def _new_conn(self): + """ + Return a fresh :class:`HTTPConnection`. + """ + self.num_connections += 1 + log.debug( + "Starting new HTTP connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "80", + ) + + conn = self.ConnectionCls( + host=self.host, + port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict, + **self.conn_kw + ) + return conn + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") + + except queue.Empty: + if self.block: + raise EmptyPoolError( + self, + "Pool reached maximum size and no more connections are allowed.", + ) + pass # Oh well, we'll create a new connection then + + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.debug("Resetting dropped connection: %s", self.host) + conn.close() + if getattr(conn, "auto_open", 1) == 0: + # This is a proxied connection that has been mutated by + # http.client._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. + """ + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # This should never happen if self.block == True + log.warning( + "Connection pool is full, discarding connection: %s. Connection pool size: %s", + self.host, + self.pool.qsize(), + ) + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + + def _get_timeout(self, timeout): + """Helper that always returns a :class:`urllib3.util.Timeout`""" + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, "errno") and err.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if "timed out" in str(err) or "did not complete (read)" in str( + err + ): # Python < 2.7.4 + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % timeout_value + ) + + def _make_request( + self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw + ): + """ + Perform a request on a given urllib connection object taken from our + pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. + """ + self.num_requests += 1 + + timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + + # Trigger any extra validation we need to do. + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # conn.request() calls http.client.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + try: + if chunked: + conn.request_chunked(method, url, **httplib_request_kw) + else: + conn.request(method, url, **httplib_request_kw) + + # We are swallowing BrokenPipeError (errno.EPIPE) since the server is + # legitimately able to close the connection after sending a valid response. + # With this behaviour, the received response is still readable. + except BrokenPipeError: + # Python 3 + pass + except IOError as e: + # Python 2 and macOS/Linux + # EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS + # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ + if e.errno not in { + errno.EPIPE, + errno.ESHUTDOWN, + errno.EPROTOTYPE, + }: + raise + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + + # App Engine doesn't have a sock attr + if getattr(conn, "sock", None): + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout + ) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: + # Python 2.7, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: + # Python 3 + try: + httplib_response = conn.getresponse() + except BaseException as e: + # Remove the TypeError from the exception chain in + # Python 3 (including for exceptions like SystemExit). + # Otherwise it looks like a bug in the code. + six.raise_from(e, None) + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + raise + + # AppEngine doesn't have a version attr. + http_version = getattr(conn, "_http_vsn_str", "HTTP/?") + log.debug( + '%s://%s:%s "%s %s %s" %s %s', + self.scheme, + self.host, + self.port, + method, + url, + http_version, + httplib_response.status, + httplib_response.length, + ) + + try: + assert_header_parsing(httplib_response.msg) + except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3 + log.warning( + "Failed to parse headers (url=%s): %s", + self._absolute_url(url), + hpe, + exc_info=True, + ) + + return httplib_response + + def _absolute_url(self, path): + return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + if self.pool is None: + return + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + + except queue.Empty: + pass # Done. + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + if url.startswith("/"): + return True + + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + if host is not None: + host = _normalize_host(host, scheme=scheme) + + # Use explicit default port for comparison when none is given + if self.port and not port: + port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None + + return (scheme, host, port) == (self.scheme, self.host, self.port) + + def urlopen( + self, + method, + url, + body=None, + headers=None, + retries=None, + redirect=True, + assert_same_host=True, + timeout=_Default, + pool_timeout=None, + release_conn=None, + chunked=False, + body_pos=None, + **response_kw + ): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param redirect: + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When ``False``, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param chunked: + If True, urllib3 will send the body using chunked transfer + encoding. Otherwise, urllib3 will send the body using the standard + content-length form. Defaults to False. + + :param int body_pos: + Position to seek to in file-like body in the event of a retry or + redirect. Typically this won't need to be set because urllib3 will + auto-populate the value when needed. + + :param \\**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + + parsed_url = parse_url(url) + destination_scheme = parsed_url.scheme + + if headers is None: + headers = self.headers + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) + + if release_conn is None: + release_conn = response_kw.get("preload_content", True) + + # Check host + if assert_same_host and not self.is_same_host(url): + raise HostChangedError(self, url, retries) + + # Ensure that the URL we're connecting to is properly encoded + if url.startswith("/"): + url = six.ensure_str(_encode_target(url)) + else: + url = six.ensure_str(parsed_url.url) + + conn = None + + # Track whether `conn` needs to be released before + # returning/raising/recursing. Update this variable if necessary, and + # leave `release_conn` constant throughout the function. That way, if + # the function recurses, the original value of `release_conn` will be + # passed down into the recursive call, and its value will be respected. + # + # See issue #651 [1] for details. + # + # [1] + release_this_conn = release_conn + + http_tunnel_required = connection_requires_http_tunnel( + self.proxy, self.proxy_config, destination_scheme + ) + + # Merge the proxy headers. Only done when not using HTTP CONNECT. We + # have to copy the headers dict so we can safely change it without those + # changes being reflected in anyone else's copy. + if not http_tunnel_required: + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + + # Keep track of whether we cleanly exited the except block. This + # ensures we do proper cleanup in finally. + clean_exit = False + + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body_pos = set_file_position(body, body_pos) + + try: + # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) + conn = self._get_conn(timeout=pool_timeout) + + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr( + conn, "sock", None + ) + if is_new_proxy_conn and http_tunnel_required: + self._prepare_proxy(conn) + + # Make the request on the httplib connection object. + httplib_response = self._make_request( + conn, + method, + url, + timeout=timeout_obj, + body=body, + headers=headers, + chunked=chunked, + ) + + # If we're going to release the connection in ``finally:``, then + # the response doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = conn if not release_conn else None + + # Pass method to Response for length checking + response_kw["request_method"] = method + + # Import httplib's response into our own wrapper object + response = self.ResponseCls.from_httplib( + httplib_response, + pool=self, + connection=response_conn, + retries=retries, + **response_kw + ) + + # Everything went great! + clean_exit = True + + except EmptyPoolError: + # Didn't get a connection from the pool, no need to clean up + clean_exit = True + release_this_conn = False + raise + + except ( + TimeoutError, + HTTPException, + SocketError, + ProtocolError, + BaseSSLError, + SSLError, + CertificateError, + ) as e: + # Discard the connection for these exceptions. It will be + # replaced during the next _get_conn() call. + clean_exit = False + + def _is_ssl_error_message_from_http_proxy(ssl_error): + # We're trying to detect the message 'WRONG_VERSION_NUMBER' but + # SSLErrors are kinda all over the place when it comes to the message, + # so we try to cover our bases here! + message = " ".join(re.split("[^a-z]", str(ssl_error).lower())) + return ( + "wrong version number" in message or "unknown protocol" in message + ) + + # Try to detect a common user error with proxies which is to + # set an HTTP proxy to be HTTPS when it should be 'http://' + # (ie {'http': 'http://proxy', 'https': 'https://proxy'}) + # Instead we add a nice error message and point to a URL. + if ( + isinstance(e, BaseSSLError) + and self.proxy + and _is_ssl_error_message_from_http_proxy(e) + ): + e = ProxyError( + "Your proxy appears to only use HTTP and not HTTPS, " + "try changing your proxy URL to be HTTP. See: " + "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html" + "#https-proxy-error-http-proxy", + SSLError(e), + ) + elif isinstance(e, (BaseSSLError, CertificateError)): + e = SSLError(e) + elif isinstance(e, (SocketError, NewConnectionError)) and self.proxy: + e = ProxyError("Cannot connect to proxy.", e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError("Connection aborted.", e) + + retries = retries.increment( + method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] + ) + retries.sleep() + + # Keep track of the error for the retry warning. + err = e + + finally: + if not clean_exit: + # We hit some kind of exception, handled or otherwise. We need + # to throw the connection away unless explicitly told not to. + # Close the connection, set the variable to None, and make sure + # we put the None back in the pool to avoid leaking it. + conn = conn and conn.close() + release_this_conn = True + + if release_this_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. + self._put_conn(conn) + + if not conn: + # Try again + log.warning( + "Retrying (%r) after connection broken by '%r': %s", retries, err, url + ) + return self.urlopen( + method, + url, + body, + headers, + retries, + redirect, + assert_same_host, + timeout=timeout, + pool_timeout=pool_timeout, + release_conn=release_conn, + chunked=chunked, + body_pos=body_pos, + **response_kw + ) + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + if response.status == 303: + method = "GET" + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + response.drain_conn() + raise + return response + + response.drain_conn() + retries.sleep_for_retry(response) + log.debug("Redirecting %s -> %s", url, redirect_location) + return self.urlopen( + method, + redirect_location, + body, + headers, + retries=retries, + redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, + pool_timeout=pool_timeout, + release_conn=release_conn, + chunked=chunked, + body_pos=body_pos, + **response_kw + ) + + # Check if we should retry the HTTP response. + has_retry_after = bool(response.getheader("Retry-After")) + if retries.is_retry(method, response.status, has_retry_after): + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_status: + response.drain_conn() + raise + return response + + response.drain_conn() + retries.sleep(response) + log.debug("Retry: %s", url) + return self.urlopen( + method, + url, + body, + headers, + retries=retries, + redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, + pool_timeout=pool_timeout, + release_conn=release_conn, + chunked=chunked, + body_pos=body_pos, + **response_kw + ) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + :class:`.HTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl` + is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. + """ + + scheme = "https" + ConnectionCls = HTTPSConnection + + def __init__( + self, + host, + port=None, + strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, + maxsize=1, + block=False, + headers=None, + retries=None, + _proxy=None, + _proxy_headers=None, + key_file=None, + cert_file=None, + cert_reqs=None, + key_password=None, + ca_certs=None, + ssl_version=None, + assert_hostname=None, + assert_fingerprint=None, + ca_cert_dir=None, + **conn_kw + ): + + HTTPConnectionPool.__init__( + self, + host, + port, + strict, + timeout, + maxsize, + block, + headers, + retries, + _proxy, + _proxy_headers, + **conn_kw + ) + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.key_password = key_password + self.ca_certs = ca_certs + self.ca_cert_dir = ca_cert_dir + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _prepare_conn(self, conn): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert( + key_file=self.key_file, + key_password=self.key_password, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint, + ) + conn.ssl_version = self.ssl_version + return conn + + def _prepare_proxy(self, conn): + """ + Establishes a tunnel connection through HTTP CONNECT. + + Tunnel connection is established early because otherwise httplib would + improperly set Host: header to proxy's IP:port. + """ + + conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) + + if self.proxy.scheme == "https": + conn.tls_in_tls_required = True + + conn.connect() + + def _new_conn(self): + """ + Return a fresh :class:`http.client.HTTPSConnection`. + """ + self.num_connections += 1 + log.debug( + "Starting new HTTPS connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "443", + ) + + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + raise SSLError( + "Can't connect to HTTPS URL because the SSL module is not available." + ) + + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + + conn = self.ConnectionCls( + host=actual_host, + port=actual_port, + timeout=self.timeout.connect_timeout, + strict=self.strict, + cert_file=self.cert_file, + key_file=self.key_file, + key_password=self.key_password, + **self.conn_kw + ) + + return self._prepare_conn(conn) + + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not getattr(conn, "sock", None): # AppEngine might not have `.sock` + conn.connect() + + if not conn.is_verified: + warnings.warn( + ( + "Unverified HTTPS request is being made to host '%s'. " + "Adding certificate verification is strongly advised. See: " + "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html" + "#ssl-warnings" % conn.host + ), + InsecureRequestWarning, + ) + + if getattr(conn, "proxy_is_verified", None) is False: + warnings.warn( + ( + "Unverified HTTPS connection done to an HTTPS proxy. " + "Adding certificate verification is strongly advised. See: " + "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html" + "#ssl-warnings" + ), + InsecureRequestWarning, + ) + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \\**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example:: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + port = port or port_by_scheme.get(scheme, 80) + if scheme == "https": + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def _normalize_host(host, scheme): + """ + Normalize hosts for comparisons and use with sockets. + """ + + host = normalize_host(host, scheme) + + # httplib doesn't like it when we include brackets in IPv6 addresses + # Specifically, if we include brackets but also pass the port then + # httplib crazily doubles up the square brackets on the Host header. + # Instead, we need to make sure we never pass ``None`` as the port. + # However, for backward compatibility reasons we can't actually + # *assert* that. See http://bugs.python.org/issue28539 + if host.startswith("[") and host.endswith("]"): + host = host[1:-1] + return host diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/exceptions.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..cba6f3f560f71b3b15ab6aaf21dde4f1bba1bd00 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/exceptions.py @@ -0,0 +1,323 @@ +from __future__ import absolute_import + +from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead + +# Base Exceptions + + +class HTTPError(Exception): + """Base exception used by this module.""" + + pass + + +class HTTPWarning(Warning): + """Base warning used by this module.""" + + pass + + +class PoolError(HTTPError): + """Base exception for errors caused within a pool.""" + + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, None) + + +class RequestError(PoolError): + """Base exception for PoolErrors that have associated URLs.""" + + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) + + +class SSLError(HTTPError): + """Raised when SSL certificate fails in an HTTPS connection.""" + + pass + + +class ProxyError(HTTPError): + """Raised when the connection to a proxy fails.""" + + def __init__(self, message, error, *args): + super(ProxyError, self).__init__(message, error, *args) + self.original_error = error + + +class DecodeError(HTTPError): + """Raised when automatic decoding based on Content-Type fails.""" + + pass + + +class ProtocolError(HTTPError): + """Raised when something unexpected happens mid-request/response.""" + + pass + + +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + +# Leaf Exceptions + + +class MaxRetryError(RequestError): + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ + + def __init__(self, pool, url, reason=None): + self.reason = reason + + message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason) + + RequestError.__init__(self, pool, url, message) + + +class HostChangedError(RequestError): + """Raised when an existing pool gets a request for a foreign host.""" + + def __init__(self, pool, url, retries=3): + message = "Tried to open a foreign host with url: %s" % url + RequestError.__init__(self, pool, url, message) + self.retries = retries + + +class TimeoutStateError(HTTPError): + """Raised when passing an invalid state to a timeout""" + + pass + + +class TimeoutError(HTTPError): + """Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + ` and :exc:`ConnectTimeoutErrors `. + """ + + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + """Raised when a socket timeout occurs while receiving data from a server""" + + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + """Raised when a socket timeout occurs while connecting to a server""" + + pass + + +class NewConnectionError(ConnectTimeoutError, PoolError): + """Raised when we fail to establish a new connection. Usually ECONNREFUSED.""" + + pass + + +class EmptyPoolError(PoolError): + """Raised when a pool runs out of connections and no more are allowed.""" + + pass + + +class ClosedPoolError(PoolError): + """Raised when a request enters a pool after the pool has been closed.""" + + pass + + +class LocationValueError(ValueError, HTTPError): + """Raised when there is something wrong with a given URL input.""" + + pass + + +class LocationParseError(LocationValueError): + """Raised when get_host or similar fails to parse the URL input.""" + + def __init__(self, location): + message = "Failed to parse: %s" % location + HTTPError.__init__(self, message) + + self.location = location + + +class URLSchemeUnknown(LocationValueError): + """Raised when a URL input has an unsupported scheme.""" + + def __init__(self, scheme): + message = "Not supported URL scheme %s" % scheme + super(URLSchemeUnknown, self).__init__(message) + + self.scheme = scheme + + +class ResponseError(HTTPError): + """Used as a container for an error reason supplied in a MaxRetryError.""" + + GENERIC_ERROR = "too many error responses" + SPECIFIC_ERROR = "too many {status_code} error responses" + + +class SecurityWarning(HTTPWarning): + """Warned when performing security reducing actions""" + + pass + + +class SubjectAltNameWarning(SecurityWarning): + """Warned when connecting to a host with a certificate missing a SAN.""" + + pass + + +class InsecureRequestWarning(SecurityWarning): + """Warned when making an unverified HTTPS request.""" + + pass + + +class SystemTimeWarning(SecurityWarning): + """Warned when system time is suspected to be wrong""" + + pass + + +class InsecurePlatformWarning(SecurityWarning): + """Warned when certain TLS/SSL configuration is not available on a platform.""" + + pass + + +class SNIMissingWarning(HTTPWarning): + """Warned when making a HTTPS request without SNI available.""" + + pass + + +class DependencyWarning(HTTPWarning): + """ + Warned when an attempt is made to import a module with missing optional + dependencies. + """ + + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + """Response needs to be chunked in order to read it as chunks.""" + + pass + + +class BodyNotHttplibCompatible(HTTPError): + """ + Body should be :class:`http.client.HTTPResponse` like + (have an fp attribute which returns raw chunks) for read_chunked(). + """ + + pass + + +class IncompleteRead(HTTPError, httplib_IncompleteRead): + """ + Response length doesn't match expected Content-Length + + Subclass of :class:`http.client.IncompleteRead` to allow int value + for ``partial`` to avoid creating large objects on streamed reads. + """ + + def __init__(self, partial, expected): + super(IncompleteRead, self).__init__(partial, expected) + + def __repr__(self): + return "IncompleteRead(%i bytes read, %i more expected)" % ( + self.partial, + self.expected, + ) + + +class InvalidChunkLength(HTTPError, httplib_IncompleteRead): + """Invalid chunk length in a chunked response.""" + + def __init__(self, response, length): + super(InvalidChunkLength, self).__init__( + response.tell(), response.length_remaining + ) + self.response = response + self.length = length + + def __repr__(self): + return "InvalidChunkLength(got length %r, %i bytes read)" % ( + self.length, + self.partial, + ) + + +class InvalidHeader(HTTPError): + """The header provided was somehow invalid.""" + + pass + + +class ProxySchemeUnknown(AssertionError, URLSchemeUnknown): + """ProxyManager does not support the supplied scheme""" + + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + + def __init__(self, scheme): + # 'localhost' is here because our URL parser parses + # localhost:8080 -> scheme=localhost, remove if we fix this. + if scheme == "localhost": + scheme = None + if scheme is None: + message = "Proxy URL had no scheme, should start with http:// or https://" + else: + message = ( + "Proxy URL had unsupported scheme %s, should use http:// or https://" + % scheme + ) + super(ProxySchemeUnknown, self).__init__(message) + + +class ProxySchemeUnsupported(ValueError): + """Fetching HTTPS resources through HTTPS proxies is unsupported""" + + pass + + +class HeaderParsingError(HTTPError): + """Raised by assert_header_parsing, but we convert it to a log.warning statement.""" + + def __init__(self, defects, unparsed_data): + message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data) + super(HeaderParsingError, self).__init__(message) + + +class UnrewindableBodyError(HTTPError): + """urllib3 encountered an error when trying to rewind a body""" + + pass diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/fields.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/fields.py new file mode 100644 index 0000000000000000000000000000000000000000..9d630f491d9a39644ae65564dac88eb51f0bbe78 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/fields.py @@ -0,0 +1,274 @@ +from __future__ import absolute_import + +import email.utils +import mimetypes +import re + +from .packages import six + + +def guess_content_type(filename, default="application/octet-stream"): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetypes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param_rfc2231(name, value): + """ + Helper function to format and quote a single header parameter using the + strategy defined in RFC 2231. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows + `RFC 2388 Section 4.4 `_. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as ``bytes`` or `str``. + :ret: + An RFC-2231-formatted unicode string. + """ + if isinstance(value, six.binary_type): + value = value.decode("utf-8") + + if not any(ch in value for ch in '"\\\r\n'): + result = u'%s="%s"' % (name, value) + try: + result.encode("ascii") + except (UnicodeEncodeError, UnicodeDecodeError): + pass + else: + return result + + if six.PY2: # Python 2: + value = value.encode("utf-8") + + # encode_rfc2231 accepts an encoded string and returns an ascii-encoded + # string in Python 2 but accepts and returns unicode strings in Python 3 + value = email.utils.encode_rfc2231(value, "utf-8") + value = "%s*=%s" % (name, value) + + if six.PY2: # Python 2: + value = value.decode("utf-8") + + return value + + +_HTML5_REPLACEMENTS = { + u"\u0022": u"%22", + # Replace "\" with "\\". + u"\u005C": u"\u005C\u005C", +} + +# All control characters from 0x00 to 0x1F *except* 0x1B. +_HTML5_REPLACEMENTS.update( + { + six.unichr(cc): u"%{:02X}".format(cc) + for cc in range(0x00, 0x1F + 1) + if cc not in (0x1B,) + } +) + + +def _replace_multiple(value, needles_and_replacements): + def replacer(match): + return needles_and_replacements[match.group(0)] + + pattern = re.compile( + r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()]) + ) + + result = pattern.sub(replacer, value) + + return result + + +def format_header_param_html5(name, value): + """ + Helper function to format and quote a single header parameter using the + HTML5 strategy. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows the `HTML5 Working Draft + Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. + + .. _HTML5 Working Draft Section 4.10.22.7: + https://w3c.github.io/html/sec-forms.html#multipart-form-data + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as ``bytes`` or `str``. + :ret: + A unicode string, stripped of troublesome characters. + """ + if isinstance(value, six.binary_type): + value = value.decode("utf-8") + + value = _replace_multiple(value, _HTML5_REPLACEMENTS) + + return u'%s="%s"' % (name, value) + + +# For backwards-compatibility. +format_header_param = format_header_param_html5 + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. Must be unicode. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. Must be unicode. + :param headers: + An optional dict-like object of headers to initially use for the field. + :param header_formatter: + An optional callable that is used to encode and format the headers. By + default, this is :func:`format_header_param_html5`. + """ + + def __init__( + self, + name, + data, + filename=None, + headers=None, + header_formatter=format_header_param_html5, + ): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + self.header_formatter = header_formatter + + @classmethod + def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls( + fieldname, data, filename=filename, header_formatter=header_formatter + ) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. By + default, this calls ``self.header_formatter``. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + + return self.header_formatter(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value is not None: + parts.append(self._render_part(name, value)) + + return u"; ".join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append(u"%s: %s" % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append(u"%s: %s" % (header_name, header_value)) + + lines.append(u"\r\n") + return u"\r\n".join(lines) + + def make_multipart( + self, content_disposition=None, content_type=None, content_location=None + ): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers["Content-Disposition"] = content_disposition or u"form-data" + self.headers["Content-Disposition"] += u"; ".join( + [ + u"", + self._render_parts( + ((u"name", self._name), (u"filename", self._filename)) + ), + ] + ) + self.headers["Content-Type"] = content_type + self.headers["Content-Location"] = content_location diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/filepost.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/filepost.py new file mode 100644 index 0000000000000000000000000000000000000000..36c9252c647e67bc7353c523152568b993c1331f --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/filepost.py @@ -0,0 +1,98 @@ +from __future__ import absolute_import + +import binascii +import codecs +import os +from io import BytesIO + +from .fields import RequestField +from .packages import six +from .packages.six import b + +writer = codecs.lookup("utf-8")[3] + + +def choose_boundary(): + """ + Our embarrassingly-simple replacement for mimetools.choose_boundary. + """ + boundary = binascii.hexlify(os.urandom(16)) + if not six.PY2: + boundary = boundary.decode("ascii") + return boundary + + +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) + + +def iter_fields(fields): + """ + .. deprecated:: 1.6 + + Iterate over fields. + + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. + + :param fields: + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`urllib3.filepost.choose_boundary`. + """ + body = BytesIO() + if boundary is None: + boundary = choose_boundary() + + for field in iter_field_objects(fields): + body.write(b("--%s\r\n" % (boundary))) + + writer(body).write(field.render_headers()) + data = field.data + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, six.text_type): + writer(body).write(data) + else: + body.write(data) + + body.write(b"\r\n") + + body.write(b("--%s--\r\n" % (boundary))) + + content_type = str("multipart/form-data; boundary=%s" % boundary) + + return body.getvalue(), content_type diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/poolmanager.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/poolmanager.py new file mode 100644 index 0000000000000000000000000000000000000000..ca4ec341184adb3d30f3cd825b49a81b87d29b08 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/poolmanager.py @@ -0,0 +1,537 @@ +from __future__ import absolute_import + +import collections +import functools +import logging + +from ._collections import RecentlyUsedContainer +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme +from .exceptions import ( + LocationValueError, + MaxRetryError, + ProxySchemeUnknown, + ProxySchemeUnsupported, + URLSchemeUnknown, +) +from .packages import six +from .packages.six.moves.urllib.parse import urljoin +from .request import RequestMethods +from .util.proxy import connection_requires_http_tunnel +from .util.retry import Retry +from .util.url import parse_url + +__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] + + +log = logging.getLogger(__name__) + +SSL_KEYWORDS = ( + "key_file", + "cert_file", + "cert_reqs", + "ca_certs", + "ssl_version", + "ca_cert_dir", + "ssl_context", + "key_password", + "server_hostname", +) + +# All known keyword arguments that could be provided to the pool manager, its +# pools, or the underlying connections. This is used to construct a pool key. +_key_fields = ( + "key_scheme", # str + "key_host", # str + "key_port", # int + "key_timeout", # int or float or Timeout + "key_retries", # int or Retry + "key_strict", # bool + "key_block", # bool + "key_source_address", # str + "key_key_file", # str + "key_key_password", # str + "key_cert_file", # str + "key_cert_reqs", # str + "key_ca_certs", # str + "key_ssl_version", # str + "key_ca_cert_dir", # str + "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext + "key_maxsize", # int + "key_headers", # dict + "key__proxy", # parsed proxy url + "key__proxy_headers", # dict + "key__proxy_config", # class + "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples + "key__socks_options", # dict + "key_assert_hostname", # bool or string + "key_assert_fingerprint", # str + "key_server_hostname", # str +) + +#: The namedtuple class used to construct keys for the connection pool. +#: All custom key schemes should include the fields in this key at a minimum. +PoolKey = collections.namedtuple("PoolKey", _key_fields) + +_proxy_config_fields = ("ssl_context", "use_forwarding_for_https") +ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields) + + +def _default_key_normalizer(key_class, request_context): + """ + Create a pool key out of a request context dictionary. + + According to RFC 3986, both the scheme and host are case-insensitive. + Therefore, this function normalizes both before constructing the pool + key for an HTTPS request. If you wish to change this behaviour, provide + alternate callables to ``key_fn_by_scheme``. + + :param key_class: + The class to use when constructing the key. This should be a namedtuple + with the ``scheme`` and ``host`` keys at a minimum. + :type key_class: namedtuple + :param request_context: + A dictionary-like object that contain the context for a request. + :type request_context: dict + + :return: A namedtuple that can be used as a connection pool key. + :rtype: PoolKey + """ + # Since we mutate the dictionary, make a copy first + context = request_context.copy() + context["scheme"] = context["scheme"].lower() + context["host"] = context["host"].lower() + + # These are both dictionaries and need to be transformed into frozensets + for key in ("headers", "_proxy_headers", "_socks_options"): + if key in context and context[key] is not None: + context[key] = frozenset(context[key].items()) + + # The socket_options key may be a list and needs to be transformed into a + # tuple. + socket_opts = context.get("socket_options") + if socket_opts is not None: + context["socket_options"] = tuple(socket_opts) + + # Map the kwargs to the names in the namedtuple - this is necessary since + # namedtuples can't have fields starting with '_'. + for key in list(context.keys()): + context["key_" + key] = context.pop(key) + + # Default to ``None`` for keys missing from the context + for field in key_class._fields: + if field not in context: + context[field] = None + + return key_class(**context) + + +#: A dictionary that maps a scheme to a callable that creates a pool key. +#: This can be used to alter the way pool keys are constructed, if desired. +#: Each PoolManager makes a copy of this dictionary so they can be configured +#: globally here, or individually on the instance. +key_fn_by_scheme = { + "http": functools.partial(_default_key_normalizer, PoolKey), + "https": functools.partial(_default_key_normalizer, PoolKey), +} + +pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param \\**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example:: + + >>> manager = PoolManager(num_pools=2) + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') + >>> len(manager.pools) + 2 + + """ + + proxy = None + proxy_config = None + + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + + def _new_pool(self, scheme, host, port, request_context=None): + """ + Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and + any additional pool keyword arguments. + + If ``request_context`` is provided, it is provided as keyword arguments + to the pool class used. This method is used to actually create the + connection pools handed out by :meth:`connection_from_url` and + companion methods. It is intended to be overridden for customization. + """ + pool_cls = self.pool_classes_by_scheme[scheme] + if request_context is None: + request_context = self.connection_pool_kw.copy() + + # Although the context has everything necessary to create the pool, + # this function has historically only used the scheme, host, and port + # in the positional args. When an API change is acceptable these can + # be removed. + for key in ("scheme", "host", "port"): + request_context.pop(key, None) + + if scheme == "http": + for kw in SSL_KEYWORDS: + request_context.pop(kw, None) + + return pool_cls(host, port, **request_context) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() + + def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): + """ + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. + + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is + provided, it is merged with the instance's ``connection_pool_kw`` + variable and used to create the new connection pool, if one is + needed. + """ + + if not host: + raise LocationValueError("No host specified.") + + request_context = self._merge_pool_kwargs(pool_kwargs) + request_context["scheme"] = scheme or "http" + if not port: + port = port_by_scheme.get(request_context["scheme"].lower(), 80) + request_context["port"] = port + request_context["host"] = host + + return self.connection_from_context(request_context) + + def connection_from_context(self, request_context): + """ + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. + + ``request_context`` must at least contain the ``scheme`` key and its + value must be a key in ``key_fn_by_scheme`` instance variable. + """ + scheme = request_context["scheme"].lower() + pool_key_constructor = self.key_fn_by_scheme.get(scheme) + if not pool_key_constructor: + raise URLSchemeUnknown(scheme) + pool_key = pool_key_constructor(request_context) + + return self.connection_from_pool_key(pool_key, request_context=request_context) + + def connection_from_pool_key(self, pool_key, request_context=None): + """ + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. + + ``pool_key`` should be a namedtuple that only contains immutable + objects. At a minimum it must have the ``scheme``, ``host``, and + ``port`` fields. + """ + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + scheme = request_context["scheme"] + host = request_context["host"] + port = request_context["port"] + pool = self._new_pool(scheme, host, port, request_context=request_context) + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url, pool_kwargs=None): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url`. + + If ``pool_kwargs`` is not provided and a new pool needs to be + constructed, ``self.connection_pool_kw`` is used to initialize + the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` + is provided, it is used instead. Note that if a new pool does not + need to be created for the request, the provided ``pool_kwargs`` are + not used. + """ + u = parse_url(url) + return self.connection_from_host( + u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs + ) + + def _merge_pool_kwargs(self, override): + """ + Merge a dictionary of override values for self.connection_pool_kw. + + This does not modify self.connection_pool_kw and returns a new dict. + Any keys in the override dictionary with a value of ``None`` are + removed from the merged dictionary. + """ + base_pool_kwargs = self.connection_pool_kw.copy() + if override: + for key, value in override.items(): + if value is None: + try: + del base_pool_kwargs[key] + except KeyError: + pass + else: + base_pool_kwargs[key] = value + return base_pool_kwargs + + def _proxy_requires_url_absolute_form(self, parsed_url): + """ + Indicates if the proxy requires the complete destination URL in the + request. Normally this is only needed when not using an HTTP CONNECT + tunnel. + """ + if self.proxy is None: + return False + + return not connection_requires_http_tunnel( + self.proxy, self.proxy_config, parsed_url.scheme + ) + + def _validate_proxy_scheme_url_selection(self, url_scheme): + """ + Validates that were not attempting to do TLS in TLS connections on + Python2 or with unsupported SSL implementations. + """ + if self.proxy is None or url_scheme != "https": + return + + if self.proxy.scheme != "https": + return + + if six.PY2 and not self.proxy_config.use_forwarding_for_https: + raise ProxySchemeUnsupported( + "Contacting HTTPS destinations through HTTPS proxies " + "'via CONNECT tunnels' is not supported in Python 2" + ) + + def urlopen(self, method, url, redirect=True, **kw): + """ + Same as :meth:`urllib3.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. + + The given ``url`` parameter must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + u = parse_url(url) + self._validate_proxy_scheme_url_selection(u.scheme) + + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw["assert_same_host"] = False + kw["redirect"] = False + + if "headers" not in kw: + kw["headers"] = self.headers.copy() + + if self._proxy_requires_url_absolute_form(u): + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + + # RFC 7231, Section 6.4.4 + if response.status == 303: + method = "GET" + + retries = kw.get("retries") + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + # Strip headers marked as unsafe to forward to the redirected location. + # Check remove_headers_on_redirect to avoid a potential network call within + # conn.is_same_host() which may use socket.gethostbyname() in the future. + if retries.remove_headers_on_redirect and not conn.is_same_host( + redirect_location + ): + headers = list(six.iterkeys(kw["headers"])) + for header in headers: + if header.lower() in retries.remove_headers_on_redirect: + kw["headers"].pop(header, None) + + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + response.drain_conn() + raise + return response + + kw["retries"] = retries + kw["redirect"] = redirect + + log.info("Redirecting %s -> %s", url, redirect_location) + + response.drain_conn() + return self.urlopen(method, redirect_location, **kw) + + +class ProxyManager(PoolManager): + """ + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param proxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary containing headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + :param proxy_ssl_context: + The proxy SSL context is used to establish the TLS connection to the + proxy when using HTTPS proxies. + + :param use_forwarding_for_https: + (Defaults to False) If set to True will forward requests to the HTTPS + proxy to be made on behalf of the client instead of creating a TLS + tunnel via the CONNECT method. **Enabling this flag means that request + and response headers and content will be visible from the HTTPS proxy** + whereas tunneling keeps request and response headers and content + private. IP address, target hostname, SNI, and port are always visible + to an HTTPS proxy even when this flag is disabled. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + + """ + + def __init__( + self, + proxy_url, + num_pools=10, + headers=None, + proxy_headers=None, + proxy_ssl_context=None, + use_forwarding_for_https=False, + **connection_pool_kw + ): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = "%s://%s:%i" % ( + proxy_url.scheme, + proxy_url.host, + proxy_url.port, + ) + proxy = parse_url(proxy_url) + + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) + + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + self.proxy_ssl_context = proxy_ssl_context + self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https) + + connection_pool_kw["_proxy"] = self.proxy + connection_pool_kw["_proxy_headers"] = self.proxy_headers + connection_pool_kw["_proxy_config"] = self.proxy_config + + super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme, pool_kwargs=pool_kwargs + ) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs + ) + + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {"Accept": "*/*"} + + netloc = parse_url(url).netloc + if netloc: + headers_["Host"] = netloc + + if headers: + headers_.update(headers) + return headers_ + + def urlopen(self, method, url, redirect=True, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + u = parse_url(url) + if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): + # For connections using HTTP CONNECT, httplib sets the necessary + # headers on the CONNECT to the proxy. If we're not using CONNECT, + # we'll definitely need to set 'Host' at the very least. + headers = kw.get("headers", self.headers) + kw["headers"] = self._set_proxy_headers(url, headers) + + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) + + +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/request.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/request.py new file mode 100644 index 0000000000000000000000000000000000000000..398386a5b9f61c13be314e256e671a37d28e3623 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/request.py @@ -0,0 +1,170 @@ +from __future__ import absolute_import + +from .filepost import encode_multipart_formdata +from .packages.six.moves.urllib.parse import urlencode + +__all__ = ["RequestMethods"] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`urllib3.HTTPConnectionPool` and + :class:`urllib3.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-form-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + _encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"} + + def __init__(self, headers=None): + self.headers = headers or {} + + def urlopen( + self, + method, + url, + body=None, + headers=None, + encode_multipart=True, + multipart_boundary=None, + **kw + ): # Abstract + raise NotImplementedError( + "Classes extending RequestMethods must implement " + "their own ``urlopen`` method." + ) + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + urlopen_kw["request_url"] = url + + if method in self._encode_url_methods: + return self.request_encode_url( + method, url, fields=fields, headers=headers, **urlopen_kw + ) + else: + return self.request_encode_body( + method, url, fields=fields, headers=headers, **urlopen_kw + ) + + def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if headers is None: + headers = self.headers + + extra_kw = {"headers": headers} + extra_kw.update(urlopen_kw) + + if fields: + url += "?" + urlencode(fields) + + return self.urlopen(method, url, **extra_kw) + + def request_encode_body( + self, + method, + url, + fields=None, + headers=None, + encode_multipart=True, + multipart_boundary=None, + **urlopen_kw + ): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :func:`urllib3.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise + :func:`urllib.parse.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request + signing, such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example:: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimic behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if headers is None: + headers = self.headers + + extra_kw = {"headers": {}} + + if fields: + if "body" in urlopen_kw: + raise TypeError( + "request got values for both 'fields' and 'body', can only specify one." + ) + + if encode_multipart: + body, content_type = encode_multipart_formdata( + fields, boundary=multipart_boundary + ) + else: + body, content_type = ( + urlencode(fields), + "application/x-www-form-urlencoded", + ) + + extra_kw["body"] = body + extra_kw["headers"] = {"Content-Type": content_type} + + extra_kw["headers"].update(headers) + extra_kw.update(urlopen_kw) + + return self.urlopen(method, url, **extra_kw) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/response.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/response.py new file mode 100644 index 0000000000000000000000000000000000000000..fdb50ddb2fb389c3c7207663a4617075444caef0 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3/response.py @@ -0,0 +1,824 @@ +from __future__ import absolute_import + +import io +import logging +import zlib +from contextlib import contextmanager +from socket import error as SocketError +from socket import timeout as SocketTimeout + +try: + try: + import brotlicffi as brotli + except ImportError: + import brotli +except ImportError: + brotli = None + +from ._collections import HTTPHeaderDict +from .connection import BaseSSLError, HTTPException +from .exceptions import ( + BodyNotHttplibCompatible, + DecodeError, + HTTPError, + IncompleteRead, + InvalidChunkLength, + InvalidHeader, + ProtocolError, + ReadTimeoutError, + ResponseNotChunked, + SSLError, +) +from .packages import six +from .util.response import is_fp_closed, is_response_to_head + +log = logging.getLogger(__name__) + + +class DeflateDecoder(object): + def __init__(self): + self._first_try = True + self._data = b"" + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + decompressed = self._obj.decompress(data) + if decompressed: + self._first_try = False + self._data = None + return decompressed + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +class GzipDecoderState(object): + + FIRST_MEMBER = 0 + OTHER_MEMBERS = 1 + SWALLOW_DATA = 2 + + +class GzipDecoder(object): + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + self._state = GzipDecoderState.FIRST_MEMBER + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + ret = bytearray() + if self._state == GzipDecoderState.SWALLOW_DATA or not data: + return bytes(ret) + while True: + try: + ret += self._obj.decompress(data) + except zlib.error: + previous_state = self._state + # Ignore data after the first error + self._state = GzipDecoderState.SWALLOW_DATA + if previous_state == GzipDecoderState.OTHER_MEMBERS: + # Allow trailing garbage acceptable in other gzip clients + return bytes(ret) + raise + data = self._obj.unused_data + if not data: + return bytes(ret) + self._state = GzipDecoderState.OTHER_MEMBERS + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + +if brotli is not None: + + class BrotliDecoder(object): + # Supports both 'brotlipy' and 'Brotli' packages + # since they share an import name. The top branches + # are for 'brotlipy' and bottom branches for 'Brotli' + def __init__(self): + self._obj = brotli.Decompressor() + if hasattr(self._obj, "decompress"): + self.decompress = self._obj.decompress + else: + self.decompress = self._obj.process + + def flush(self): + if hasattr(self._obj, "flush"): + return self._obj.flush() + return b"" + + +class MultiDecoder(object): + """ + From RFC7231: + If one or more encodings have been applied to a representation, the + sender that applied the encodings MUST generate a Content-Encoding + header field that lists the content codings in the order in which + they were applied. + """ + + def __init__(self, modes): + self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] + + def flush(self): + return self._decoders[0].flush() + + def decompress(self, data): + for d in reversed(self._decoders): + data = d.decompress(data) + return data + + +def _get_decoder(mode): + if "," in mode: + return MultiDecoder(mode) + + if mode == "gzip": + return GzipDecoder() + + if brotli is not None and mode == "br": + return BrotliDecoder() + + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): + """ + HTTP Response container. + + Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. + + Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param original_response: + When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + + :param retries: + The retries contains the last :class:`~urllib3.util.retry.Retry` that + was used during the request. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. + """ + + CONTENT_DECODERS = ["gzip", "deflate"] + if brotli is not None: + CONTENT_DECODERS += ["br"] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + def __init__( + self, + body="", + headers=None, + status=0, + version=0, + reason=None, + strict=0, + preload_content=True, + decode_content=True, + original_response=None, + pool=None, + connection=None, + msg=None, + retries=None, + enforce_content_length=False, + request_method=None, + request_url=None, + auto_close=True, + ): + + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) + self.status = status + self.version = version + self.reason = reason + self.strict = strict + self.decode_content = decode_content + self.retries = retries + self.enforce_content_length = enforce_content_length + self.auto_close = auto_close + + self._decoder = None + self._body = None + self._fp = None + self._original_response = original_response + self._fp_bytes_read = 0 + self.msg = msg + self._request_url = request_url + + if body and isinstance(body, (six.string_types, bytes)): + self._body = body + + self._pool = pool + self._connection = connection + + if hasattr(body, "read"): + self._fp = body + + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get("transfer-encoding", "").lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # Determine length of response + self.length_remaining = self._init_length(request_method) + + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get("location") + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + def drain_conn(self): + """ + Read and discard any remaining HTTP response data in the response connection. + + Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. + """ + try: + self.read() + except (HTTPError, SocketError, BaseSSLError, HTTPException): + pass + + @property + def data(self): + # For backwards-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + @property + def connection(self): + return self._connection + + def isclosed(self): + return is_fp_closed(self._fp) + + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` + if bytes are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + + def _init_length(self, request_method): + """ + Set initial length value for Response content if available. + """ + length = self.headers.get("content-length") + + if length is not None: + if self.chunked: + # This Response will fail with an IncompleteRead if it can't be + # received as chunked. This method falls back to attempt reading + # the response before raising an exception. + log.warning( + "Received response with both Content-Length and " + "Transfer-Encoding set. This is expressly forbidden " + "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " + "attempting to process response as Transfer-Encoding: " + "chunked." + ) + return None + + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = set([int(val) for val in length.split(",")]) + if len(lengths) > 1: + raise InvalidHeader( + "Content-Length contained multiple " + "unmatching values (%s)" % length + ) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + # Convert status to int for comparison + # In some cases, httplib returns a status of "_UNKNOWN" + try: + status = int(self.status) + except ValueError: + status = 0 + + # Check for responses that shouldn't include a body + if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": + length = 0 + + return length + + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get("content-encoding", "").lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + elif "," in content_encoding: + encodings = [ + e.strip() + for e in content_encoding.split(",") + if e.strip() in self.CONTENT_DECODERS + ] + if len(encodings): + self._decoder = _get_decoder(content_encoding) + + DECODER_ERROR_CLASSES = (IOError, zlib.error) + if brotli is not None: + DECODER_ERROR_CLASSES += (brotli.error,) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + if not decode_content: + return data + + try: + if self._decoder: + data = self._decoder.decompress(data) + except self.DECODER_ERROR_CLASSES as e: + content_encoding = self.headers.get("content-encoding", "").lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e, + ) + if flush_decoder: + data += self._flush_decoder() + + return data + + def _flush_decoder(self): + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + buf = self._decoder.decompress(b"") + return buf + self._decoder.flush() + + return b"" + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + clean_exit = False + + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, "Read timed out.") + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if "read operation timed out" not in str(e): + # SSL errors related to framing/MAC get wrapped and reraised here + raise SSLError(e) + + raise ReadTimeoutError(self._pool, None, "Read timed out.") + + except (HTTPException, SocketError) as e: + # This includes IncompleteRead. + raise ProtocolError("Connection broken: %r" % e, e) + + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now to ensure that the connection is + # released back to the pool. + if self._original_response: + self._original_response.close() + + # Closing the response may not actually be sufficient to close + # everything, so if we have a hold of the connection close that + # too. + if self._connection: + self._connection.close() + + # If we hold the original response but it's closed now, we should + # return the connection back to the pool. + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`http.client.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + self._init_decoder() + if decode_content is None: + decode_content = self.decode_content + + if self._fp is None: + return + + flush_decoder = False + fp_closed = getattr(self._fp, "closed", False) + + with self._error_catcher(): + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() if not fp_closed else b"" + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) if not fp_closed else b"" + if ( + amt != 0 and not data + ): # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + if self.enforce_content_length and self.length_remaining not in ( + 0, + None, + ): + # This is an edge case that httplib failed to cover due + # to concerns of backward compatibility. We're + # addressing it here to make sure IncompleteRead is + # raised during streaming, so all calls with incorrect + # Content-Length are caught. + raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + + if data: + self._fp_bytes_read += len(data) + if self.length_remaining is not None: + self.length_remaining -= len(data) + + data = self._decode(data, decode_content, flush_decoder) + + if cache_content: + self._body = data + + return data + + def stream(self, amt=2 ** 16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + if self.chunked and self.supports_chunked_reads(): + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`http.client.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + headers = r.msg + + if not isinstance(headers, HTTPHeaderDict): + if six.PY2: + # Python 2.7 + headers = HTTPHeaderDict.from_httplib(headers) + else: + headers = HTTPHeaderDict(headers.items()) + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, "strict", 0) + resp = ResponseCls( + body=r, + headers=headers, + status=r.status, + version=r.version, + reason=r.reason, + strict=strict, + original_response=r, + **response_kw + ) + return resp + + # Backwards-compatibility methods for http.client.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) + + # Backwards compatibility for http.cookiejar + def info(self): + return self.headers + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + if self._connection: + self._connection.close() + + if not self.auto_close: + io.IOBase.close(self) + + @property + def closed(self): + if not self.auto_close: + return io.IOBase.closed.__get__(self) + elif self._fp is None: + return True + elif hasattr(self._fp, "isclosed"): + return self._fp.isclosed() + elif hasattr(self._fp, "closed"): + return self._fp.closed + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError( + "The file-like object this HTTPResponse is wrapped " + "around has no file descriptor" + ) + + def flush(self): + if ( + self._fp is not None + and hasattr(self._fp, "flush") + and not getattr(self._fp, "closed", False) + ): + return self._fp.flush() + + def readable(self): + # This method is required for `io` module compatibility. + return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[: len(temp)] = temp + return len(temp) + + def supports_chunked_reads(self): + """ + Checks if the underlying file-like object looks like a + :class:`http.client.HTTPResponse` object. We do this by testing for + the fp attribute. If it is present we assume it returns raw chunks as + processed by read_chunked(). + """ + return hasattr(self._fp, "fp") + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b";", 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise InvalidChunkLength(self, line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked( + "Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing." + ) + if not self.supports_chunked_reads(): + raise BodyNotHttplibCompatible( + "Body should be http.client.HTTPResponse like. " + "It should have have an fp attribute which returns raw chunks." + ) + + with self._error_catcher(): + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): + self._original_response.close() + return + + # If a response is already read and closed + # then return immediately. + if self._fp.fp is None: + return + + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + decoded = self._decode( + chunk, decode_content=decode_content, flush_decoder=False + ) + if decoded: + yield decoded + + if decode_content: + # On CPython and PyPy, we should never need to flush the + # decoder. However, on Jython we *might* need to, so + # lets defensively do it anyway. + decoded = self._flush_decoder() + if decoded: # Platform-specific: Jython. + yield decoded + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b"\r\n": + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() + + def geturl(self): + """ + Returns the URL that was the source of this response. + If the request that generated this response redirected, this method + will return the final redirect location. + """ + if self.retries is not None and len(self.retries.history): + return self.retries.history[-1].redirect_location + else: + return self._request_url + + def __iter__(self): + buffer = [] + for chunk in self.stream(decode_content=True): + if b"\n" in chunk: + chunk = chunk.split(b"\n") + yield b"".join(buffer) + chunk[0] + b"\n" + for x in chunk[1:-1]: + yield x + b"\n" + if chunk[-1]: + buffer = [chunk[-1]] + else: + buffer = [] + else: + buffer.append(chunk) + if buffer: + yield b"".join(buffer) diff --git a/tmp_inputs_32_24/case00008.nii.gz b/tmp_inputs_32_24/case00008.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..1fb87ec4eb08cac0616006556985b05291bdde70 --- /dev/null +++ b/tmp_inputs_32_24/case00008.nii.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b88697e4061840dd8f7ea37887cf7e8d5c136632171a769f1cba364b940c36 +size 35307358 diff --git a/tmp_inputs_4_3/case00001.nii.gz b/tmp_inputs_4_3/case00001.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..2ca681977b0383af05426316feea4955f4b8afcd --- /dev/null +++ b/tmp_inputs_4_3/case00001.nii.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca9c0fdd6236b1a0e40e9aeb4968d05ab3986be4ec3f2f4ca2ae4dc9703a66d +size 32594383