prasb commited on Apr 7, 2025

Commit

89b4844

verified ·

1 Parent(s): ab75471

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/imageio/resources/images/stent.npz +3 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/__init__.py +56 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/base.py +368 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/device_stats_monitor.py +104 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/early_stopping.py +261 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/finetuning.py +417 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/gpu_stats_monitor.py +262 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/lambda_function.py +96 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/lr_monitor.py +354 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py +720 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_summary.py +73 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/prediction_writer.py +119 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/pruning.py +486 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/quantization.py +344 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/rich_model_summary.py +109 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/stochastic_weight_avg.py +280 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/timer.py +176 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/xla_stats_monitor.py +114 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/datamodule.py +264 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/decorators.py +60 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/hooks.py +828 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py +409 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/saving.py +419 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/distributed/__init__.py +14 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/distributed/dist.py +47 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/__pycache__/__init__.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/__pycache__/layer_sync.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/__init__.py +20 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/__pycache__/__init__.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/__pycache__/bagua_environment.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/bagua_environment.py +62 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/cluster_environment.py +87 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/kubeflow_environment.py +78 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/lightning_environment.py +101 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/lsf_environment.py +190 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/slurm_environment.py +134 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/torchelastic_environment.py +88 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/__init__.py +17 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/__pycache__/xla_plugin.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/checkpoint_plugin.py +62 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/hpu_plugin.py +52 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/torch_plugin.py +96 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/xla_plugin.py +57 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__init__.py +27 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/apex_amp.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/deepspeed.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/double.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/fully_sharded_native_amp.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/mixed.cpython-38.pyc +0 -0
my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/native_amp.cpython-38.pyc +0 -0

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/imageio/resources/images/stent.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60a83d2296b51ee6a53153e9ba96ba9020391b0c8952895d9d60a0a629ac6bb6
+size 824612

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/__init__.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.callbacks.device_stats_monitor import DeviceStatsMonitor
+from pytorch_lightning.callbacks.early_stopping import EarlyStopping
+from pytorch_lightning.callbacks.finetuning import BackboneFinetuning, BaseFinetuning
+from pytorch_lightning.callbacks.gpu_stats_monitor import GPUStatsMonitor
+from pytorch_lightning.callbacks.gradient_accumulation_scheduler import GradientAccumulationScheduler
+from pytorch_lightning.callbacks.lambda_function import LambdaCallback
+from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor
+from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
+from pytorch_lightning.callbacks.model_summary import ModelSummary
+from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter
+from pytorch_lightning.callbacks.progress import ProgressBar, ProgressBarBase, RichProgressBar, TQDMProgressBar
+from pytorch_lightning.callbacks.pruning import ModelPruning
+from pytorch_lightning.callbacks.quantization import QuantizationAwareTraining
+from pytorch_lightning.callbacks.rich_model_summary import RichModelSummary
+from pytorch_lightning.callbacks.stochastic_weight_avg import StochasticWeightAveraging
+from pytorch_lightning.callbacks.timer import Timer
+from pytorch_lightning.callbacks.xla_stats_monitor import XLAStatsMonitor
+__all__ = [
+    "BackboneFinetuning",
+    "BaseFinetuning",
+    "Callback",
+    "DeviceStatsMonitor",
+    "EarlyStopping",
+    "GPUStatsMonitor",
+    "XLAStatsMonitor",
+    "GradientAccumulationScheduler",
+    "LambdaCallback",
+    "LearningRateMonitor",
+    "ModelCheckpoint",
+    "ModelPruning",
+    "ModelSummary",
+    "BasePredictionWriter",
+    "ProgressBar",
+    "ProgressBarBase",
+    "QuantizationAwareTraining",
+    "RichModelSummary",
+    "RichProgressBar",
+    "StochasticWeightAveraging",
+    "Timer",
+    "TQDMProgressBar",
+]

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/base.py ADDED Viewed

	@@ -0,0 +1,368 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Base class used to build new callbacks.
+"""
+from typing import Any, Dict, List, Optional, Type
+import torch
+from torch.optim import Optimizer
+import pytorch_lightning as pl
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+class Callback:
+    r"""
+    Abstract base class used to build new callbacks.
+    Subclass this class and override any of the relevant hooks
+    """
+    @property
+    def state_key(self) -> str:
+        """Identifier for the state of the callback.
+        Used to store and retrieve a callback's state from the checkpoint dictionary by
+        ``checkpoint["callbacks"][state_key]``. Implementations of a callback need to provide a unique state key if 1)
+        the callback has state and 2) it is desired to maintain the state of multiple instances of that callback.
+        """
+        return self.__class__.__qualname__
+    @property
+    def _legacy_state_key(self) -> Type["Callback"]:
+        """State key for checkpoints saved prior to version 1.5.0."""
+        return type(self)
+    def _generate_state_key(self, **kwargs: Any) -> str:
+        """Formats a set of key-value pairs into a state key string with the callback class name prefixed. Useful
+        for defining a :attr:`state_key`.
+        Args:
+            **kwargs: A set of key-value pairs. Must be serializable to :class:`str`.
+        """
+        return f"{self.__class__.__qualname__}{repr(kwargs)}"
+    def on_configure_sharded_model(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use `setup()` instead.
+        Called before configure sharded model.
+        """
+    def on_before_accelerator_backend_setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use ``setup()`` instead.
+        Called before accelerator is being setup.
+        """
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        """Called when fit, validate, test, predict, or tune begins."""
+    def teardown(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        """Called when fit, validate, test, predict, or tune ends."""
+    def on_init_start(self, trainer: "pl.Trainer") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8.
+        Called when the trainer initialization begins, model has not yet been set.
+        """
+    def on_init_end(self, trainer: "pl.Trainer") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8.
+        Called when the trainer initialization ends, model has not yet been set.
+        """
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when fit begins."""
+    def on_fit_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when fit ends."""
+    def on_sanity_check_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the validation sanity check starts."""
+    def on_sanity_check_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the validation sanity check ends."""
+    def on_train_batch_start(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        batch: Any,
+        batch_idx: int,
+        unused: int = 0,
+    ) -> None:
+        """Called when the train batch begins."""
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+        unused: int = 0,
+    ) -> None:
+        """Called when the train batch ends."""
+    def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the train epoch begins."""
+    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the train epoch ends.
+        To access all batch outputs at the end of the epoch, either:
+        1. Implement `training_epoch_end` in the `LightningModule` and access outputs via the module OR
+        2. Cache data across train batch hooks inside the callback implementation to post-process in this hook.
+        """
+    def on_validation_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the val epoch begins."""
+    def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the val epoch ends."""
+    def on_test_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the test epoch begins."""
+    def on_test_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the test epoch ends."""
+    def on_predict_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the predict epoch begins."""
+    def on_predict_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", outputs: List[Any]) -> None:
+        """Called when the predict epoch ends."""
+    def on_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use
+            ``on_<train/validation/test>_epoch_start`` instead.
+        Called when either of train/val/test epoch begins.
+        """
+    def on_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use
+            ``on_<train/validation/test>_epoch_end`` instead.
+        Called when either of train/val/test epoch ends.
+        """
+    def on_batch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use
+            ``on_train_batch_start`` instead.
+        Called when the training batch begins.
+        """
+    def on_batch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use
+            ``on_train_batch_end`` instead.
+        Called when the training batch ends.
+        """
+    def on_validation_batch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", batch: Any, batch_idx: int, dataloader_idx: int
+    ) -> None:
+        """Called when the validation batch begins."""
+    def on_validation_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: Optional[STEP_OUTPUT],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """Called when the validation batch ends."""
+    def on_test_batch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", batch: Any, batch_idx: int, dataloader_idx: int
+    ) -> None:
+        """Called when the test batch begins."""
+    def on_test_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: Optional[STEP_OUTPUT],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """Called when the test batch ends."""
+    def on_predict_batch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", batch: Any, batch_idx: int, dataloader_idx: int
+    ) -> None:
+        """Called when the predict batch begins."""
+    def on_predict_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: Any,
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """Called when the predict batch ends."""
+    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the train begins."""
+    def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the train ends."""
+    def on_pretrain_routine_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use ``on_fit_start`` instead.
+        Called when the pretrain routine begins.
+        """
+    def on_pretrain_routine_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.6
+            This callback hook was deprecated in v1.6 and will be removed in v1.8. Use ``on_fit_start`` instead.
+        Called when the pretrain routine ends.
+        """
+    def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the validation loop begins."""
+    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the validation loop ends."""
+    def on_test_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the test begins."""
+    def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the test ends."""
+    def on_predict_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the predict begins."""
+    def on_predict_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when predict ends."""
+    def on_keyboard_interrupt(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        r"""
+        .. deprecated:: v1.5
+            This callback hook was deprecated in v1.5 in favor of `on_exception` and will be removed in v1.7.
+        Called when any trainer execution is interrupted by KeyboardInterrupt.
+        """
+    def on_exception(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", exception: BaseException) -> None:
+        """Called when any trainer execution is interrupted by an exception."""
+    def state_dict(self) -> Dict[str, Any]:
+        """Called when saving a checkpoint, implement to generate callback's ``state_dict``.
+        Returns:
+            A dictionary containing callback state.
+        """
+        return {}
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        """Called when loading a checkpoint, implement to reload callback state given callback's ``state_dict``.
+        Args:
+            state_dict: the callback state returned by ``state_dict``.
+        """
+        pass
+    def on_save_checkpoint(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", checkpoint: Dict[str, Any]
+    ) -> Optional[dict]:
+        r"""
+        Called when saving a checkpoint to give you a chance to store anything else you might want to save.
+        Args:
+            trainer: the current :class:`~pytorch_lightning.trainer.Trainer` instance.
+            pl_module: the current :class:`~pytorch_lightning.core.lightning.LightningModule` instance.
+            checkpoint: the checkpoint dictionary that will be saved.
+        Returns:
+            None or the callback state. Support for returning callback state will be removed in v1.8.
+        .. deprecated:: v1.6
+            Returning a value from this method was deprecated in v1.6 and will be removed in v1.8.
+            Implement ``Callback.state_dict`` instead to return state.
+            In v1.8 ``Callback.on_save_checkpoint`` can only return None.
+        """
+    def on_load_checkpoint(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", callback_state: Dict[str, Any]
+    ) -> None:
+        r"""
+        Called when loading a model checkpoint, use to reload state.
+        Args:
+            trainer: the current :class:`~pytorch_lightning.trainer.Trainer` instance.
+            pl_module: the current :class:`~pytorch_lightning.core.lightning.LightningModule` instance.
+            callback_state: the callback state returned by ``on_save_checkpoint``.
+        Note:
+            The ``on_load_checkpoint`` won't be called with an undefined state.
+            If your ``on_load_checkpoint`` hook behavior doesn't rely on a state,
+            you will still need to override ``on_save_checkpoint`` to return a ``dummy state``.
+        .. deprecated:: v1.6
+            This callback hook will change its signature and behavior in v1.8.
+            If you wish to load the state of the callback, use ``Callback.load_state_dict`` instead.
+            In v1.8 ``Callback.on_load_checkpoint(checkpoint)`` will receive the entire loaded
+            checkpoint dictionary instead of only the callback state from the checkpoint.
+        """
+    def on_before_backward(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", loss: torch.Tensor) -> None:
+        """Called before ``loss.backward()``."""
+    def on_after_backward(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called after ``loss.backward()`` and before optimizers are stepped."""
+    def on_before_optimizer_step(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", optimizer: Optimizer, opt_idx: int
+    ) -> None:
+        """Called before ``optimizer.step()``."""
+    def on_before_zero_grad(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", optimizer: Optimizer) -> None:
+        """Called before ``optimizer.zero_grad()``."""

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/device_stats_monitor.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Device Stats Monitor
+====================
+Monitors and logs device stats during training.
+"""
+from typing import Any, Dict, Optional
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+from pytorch_lightning.utilities.warnings import rank_zero_deprecation
+class DeviceStatsMonitor(Callback):
+    r"""
+    Automatically monitors and logs device stats during training stage. ``DeviceStatsMonitor``
+    is a special callback as it requires a ``logger`` to passed as argument to the ``Trainer``.
+    Raises:
+        MisconfigurationException:
+            If ``Trainer`` has no logger.
+    Example:
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import DeviceStatsMonitor
+        >>> device_stats = DeviceStatsMonitor() # doctest: +SKIP
+        >>> trainer = Trainer(callbacks=[device_stats]) # doctest: +SKIP
+    """
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        if not trainer.loggers:
+            raise MisconfigurationException("Cannot use DeviceStatsMonitor callback with Trainer that has no logger.")
+    def on_train_batch_start(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        batch: Any,
+        batch_idx: int,
+        unused: int = 0,
+    ) -> None:
+        if not trainer.loggers:
+            raise MisconfigurationException("Cannot use `DeviceStatsMonitor` callback with `Trainer(logger=False)`.")
+        if not trainer._logger_connector.should_update_logs:
+            return
+        device = trainer.strategy.root_device
+        device_stats = trainer.accelerator.get_device_stats(device)
+        for logger in trainer.loggers:
+            separator = logger.group_separator
+            prefixed_device_stats = _prefix_metric_keys(device_stats, "on_train_batch_start", separator)
+            logger.log_metrics(prefixed_device_stats, step=trainer.fit_loop.epoch_loop._batches_that_stepped)
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+        unused: int = 0,
+    ) -> None:
+        if not trainer.loggers:
+            raise MisconfigurationException("Cannot use `DeviceStatsMonitor` callback with `Trainer(logger=False)`.")
+        if not trainer._logger_connector.should_update_logs:
+            return
+        device = trainer.strategy.root_device
+        device_stats = trainer.accelerator.get_device_stats(device)
+        for logger in trainer.loggers:
+            separator = logger.group_separator
+            prefixed_device_stats = _prefix_metric_keys(device_stats, "on_train_batch_end", separator)
+            logger.log_metrics(prefixed_device_stats, step=trainer.fit_loop.epoch_loop._batches_that_stepped)
+def _prefix_metric_keys(metrics_dict: Dict[str, float], prefix: str, separator: str) -> Dict[str, float]:
+    return {prefix + separator + k: v for k, v in metrics_dict.items()}
+def prefix_metric_keys(metrics_dict: Dict[str, float], prefix: str) -> Dict[str, float]:
+    rank_zero_deprecation(
+        "`pytorch_lightning.callbacks.device_stats_monitor.prefix_metrics`"
+        " is deprecated in v1.6 and will be removed in v1.8."
+    )
+    sep = ""
+    return _prefix_metric_keys(metrics_dict, prefix, sep)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/early_stopping.py ADDED Viewed

	@@ -0,0 +1,261 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Early Stopping
+^^^^^^^^^^^^^^
+Monitor a metric and stop training when it stops improving.
+"""
+import logging
+from typing import Any, Callable, Dict, Optional, Tuple
+import numpy as np
+import torch
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_warn
+log = logging.getLogger(__name__)
+class EarlyStopping(Callback):
+    r"""
+    Monitor a metric and stop training when it stops improving.
+    Args:
+        monitor: quantity to be monitored.
+        min_delta: minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute
+            change of less than or equal to `min_delta`, will count as no improvement.
+        patience: number of checks with no improvement
+            after which training will be stopped. Under the default configuration, one check happens after
+            every training epoch. However, the frequency of validation can be modified by setting various parameters on
+            the ``Trainer``, for example ``check_val_every_n_epoch`` and ``val_check_interval``.
+            .. note::
+                It must be noted that the patience parameter counts the number of validation checks with
+                no improvement, and not the number of training epochs. Therefore, with parameters
+                ``check_val_every_n_epoch=10`` and ``patience=3``, the trainer will perform at least 40 training
+                epochs before being stopped.
+        verbose: verbosity mode.
+        mode: one of ``'min'``, ``'max'``. In ``'min'`` mode, training will stop when the quantity
+            monitored has stopped decreasing and in ``'max'`` mode it will stop when the quantity
+            monitored has stopped increasing.
+        strict: whether to crash the training if `monitor` is not found in the validation metrics.
+        check_finite: When set ``True``, stops training when the monitor becomes NaN or infinite.
+        stopping_threshold: Stop training immediately once the monitored quantity reaches this threshold.
+        divergence_threshold: Stop training as soon as the monitored quantity becomes worse than this threshold.
+        check_on_train_epoch_end: whether to run early stopping at the end of the training epoch.
+            If this is ``False``, then the check runs at the end of the validation.
+    Raises:
+        MisconfigurationException:
+            If ``mode`` is none of ``"min"`` or ``"max"``.
+        RuntimeError:
+            If the metric ``monitor`` is not available.
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import EarlyStopping
+        >>> early_stopping = EarlyStopping('val_loss')
+        >>> trainer = Trainer(callbacks=[early_stopping])
+    .. tip:: Saving and restoring multiple early stopping callbacks at the same time is supported under variation in the
+        following arguments:
+        *monitor, mode*
+        Read more: :ref:`Persisting Callback State`
+    """
+    mode_dict = {"min": torch.lt, "max": torch.gt}
+    order_dict = {"min": "<", "max": ">"}
+    def __init__(
+        self,
+        monitor: str,
+        min_delta: float = 0.0,
+        patience: int = 3,
+        verbose: bool = False,
+        mode: str = "min",
+        strict: bool = True,
+        check_finite: bool = True,
+        stopping_threshold: Optional[float] = None,
+        divergence_threshold: Optional[float] = None,
+        check_on_train_epoch_end: Optional[bool] = None,
+    ):
+        super().__init__()
+        self.monitor = monitor
+        self.min_delta = min_delta
+        self.patience = patience
+        self.verbose = verbose
+        self.mode = mode
+        self.strict = strict
+        self.check_finite = check_finite
+        self.stopping_threshold = stopping_threshold
+        self.divergence_threshold = divergence_threshold
+        self.wait_count = 0
+        self.stopped_epoch = 0
+        self._check_on_train_epoch_end = check_on_train_epoch_end
+        if self.mode not in self.mode_dict:
+            raise MisconfigurationException(f"`mode` can be {', '.join(self.mode_dict.keys())}, got {self.mode}")
+        self.min_delta *= 1 if self.monitor_op == torch.gt else -1
+        torch_inf = torch.tensor(np.Inf)
+        self.best_score = torch_inf if self.monitor_op == torch.lt else -torch_inf
+    @property
+    def state_key(self) -> str:
+        return self._generate_state_key(monitor=self.monitor, mode=self.mode)
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        if self._check_on_train_epoch_end is None:
+            # if the user runs validation multiple times per training epoch or multiple training epochs without
+            # validation, then we run after validation instead of on train epoch end
+            self._check_on_train_epoch_end = trainer.val_check_interval == 1.0 and trainer.check_val_every_n_epoch == 1
+    def _validate_condition_metric(self, logs: Dict[str, float]) -> bool:
+        monitor_val = logs.get(self.monitor)
+        error_msg = (
+            f"Early stopping conditioned on metric `{self.monitor}` which is not available."
+            " Pass in or modify your `EarlyStopping` callback to use any of the following:"
+            f' `{"`, `".join(list(logs.keys()))}`'
+        )
+        if monitor_val is None:
+            if self.strict:
+                raise RuntimeError(error_msg)
+            if self.verbose > 0:
+                rank_zero_warn(error_msg, category=RuntimeWarning)
+            return False
+        return True
+    @property
+    def monitor_op(self) -> Callable:
+        return self.mode_dict[self.mode]
+    def state_dict(self) -> Dict[str, Any]:
+        return {
+            "wait_count": self.wait_count,
+            "stopped_epoch": self.stopped_epoch,
+            "best_score": self.best_score,
+            "patience": self.patience,
+        }
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        self.wait_count = state_dict["wait_count"]
+        self.stopped_epoch = state_dict["stopped_epoch"]
+        self.best_score = state_dict["best_score"]
+        self.patience = state_dict["patience"]
+    def _should_skip_check(self, trainer: "pl.Trainer") -> bool:
+        from pytorch_lightning.trainer.states import TrainerFn
+        return trainer.state.fn != TrainerFn.FITTING or trainer.sanity_checking
+    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if not self._check_on_train_epoch_end or self._should_skip_check(trainer):
+            return
+        self._run_early_stopping_check(trainer)
+    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if self._check_on_train_epoch_end or self._should_skip_check(trainer):
+            return
+        self._run_early_stopping_check(trainer)
+    def _run_early_stopping_check(self, trainer: "pl.Trainer") -> None:
+        """Checks whether the early stopping condition is met and if so tells the trainer to stop the training."""
+        logs = trainer.callback_metrics
+        if trainer.fast_dev_run or not self._validate_condition_metric(  # disable early_stopping with fast_dev_run
+            logs
+        ):  # short circuit if metric not present
+            return
+        current = logs[self.monitor].squeeze()
+        should_stop, reason = self._evaluate_stopping_criteria(current)
+        # stop every ddp process if any world process decides to stop
+        should_stop = trainer.strategy.reduce_boolean_decision(should_stop)
+        trainer.should_stop = trainer.should_stop or should_stop
+        if should_stop:
+            self.stopped_epoch = trainer.current_epoch
+        if reason and self.verbose:
+            self._log_info(trainer, reason)
+    def _evaluate_stopping_criteria(self, current: torch.Tensor) -> Tuple[bool, Optional[str]]:
+        should_stop = False
+        reason = None
+        if self.check_finite and not torch.isfinite(current):
+            should_stop = True
+            reason = (
+                f"Monitored metric {self.monitor} = {current} is not finite."
+                f" Previous best value was {self.best_score:.3f}. Signaling Trainer to stop."
+            )
+        elif self.stopping_threshold is not None and self.monitor_op(current, self.stopping_threshold):
+            should_stop = True
+            reason = (
+                "Stopping threshold reached:"
+                f" {self.monitor} = {current} {self.order_dict[self.mode]} {self.stopping_threshold}."
+                " Signaling Trainer to stop."
+            )
+        elif self.divergence_threshold is not None and self.monitor_op(-current, -self.divergence_threshold):
+            should_stop = True
+            reason = (
+                "Divergence threshold reached:"
+                f" {self.monitor} = {current} {self.order_dict[self.mode]} {self.divergence_threshold}."
+                " Signaling Trainer to stop."
+            )
+        elif self.monitor_op(current - self.min_delta, self.best_score.to(current.device)):
+            should_stop = False
+            reason = self._improvement_message(current)
+            self.best_score = current
+            self.wait_count = 0
+        else:
+            self.wait_count += 1
+            if self.wait_count >= self.patience:
+                should_stop = True
+                reason = (
+                    f"Monitored metric {self.monitor} did not improve in the last {self.wait_count} records."
+                    f" Best score: {self.best_score:.3f}. Signaling Trainer to stop."
+                )
+        return should_stop, reason
+    def _improvement_message(self, current: torch.Tensor) -> str:
+        """Formats a log message that informs the user about an improvement in the monitored score."""
+        if torch.isfinite(self.best_score):
+            msg = (
+                f"Metric {self.monitor} improved by {abs(self.best_score - current):.3f} >="
+                f" min_delta = {abs(self.min_delta)}. New best score: {current:.3f}"
+            )
+        else:
+            msg = f"Metric {self.monitor} improved. New best score: {current:.3f}"
+        return msg
+    @staticmethod
+    def _log_info(trainer: Optional["pl.Trainer"], message: str) -> None:
+        if trainer is not None and trainer.world_size > 1:
+            log.info(f"[rank: {trainer.global_rank}] {message}")
+        else:
+            log.info(message)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/finetuning.py ADDED Viewed

	@@ -0,0 +1,417 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Finetuning Callback
+^^^^^^^^^^^^^^^^^^^^
+Freeze and unfreeze models for finetuning purposes
+"""
+import logging
+from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Union
+import torch
+from torch.nn import Module, ModuleDict
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.optim.optimizer import Optimizer
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_warn
+log = logging.getLogger(__name__)
+def multiplicative(epoch):
+    return 2
+class BaseFinetuning(Callback):
+    r"""
+    This class implements the base logic for writing your own Finetuning Callback.
+    Override ``freeze_before_training`` and ``finetune_function`` methods with your own logic.
+    ``freeze_before_training``: This method is called before ``configure_optimizers``
+        and should be used to freeze any modules parameters.
+    ``finetune_function``: This method is called on every train epoch start and should be used to
+        ``unfreeze`` any parameters. Those parameters needs to be added in a new ``param_group``
+        within the optimizer.
+    .. note:: Make sure to filter the parameters based on ``requires_grad``.
+    Example::
+        >>> from torch.optim import Adam
+        >>> class MyModel(pl.LightningModule):
+        ...     def configure_optimizer(self):
+        ...         # Make sure to filter the parameters based on `requires_grad`
+        ...         return Adam(filter(lambda p: p.requires_grad, self.parameters()))
+        ...
+        >>> class FeatureExtractorFreezeUnfreeze(BaseFinetuning):
+        ...     def __init__(self, unfreeze_at_epoch=10):
+        ...         super().__init__()
+        ...         self._unfreeze_at_epoch = unfreeze_at_epoch
+        ...
+        ...     def freeze_before_training(self, pl_module):
+        ...         # freeze any module you want
+        ...         # Here, we are freezing `feature_extractor`
+        ...         self.freeze(pl_module.feature_extractor)
+        ...
+        ...     def finetune_function(self, pl_module, current_epoch, optimizer, optimizer_idx):
+        ...         # When `current_epoch` is 10, feature_extractor will start training.
+        ...         if current_epoch == self._unfreeze_at_epoch:
+        ...             self.unfreeze_and_add_param_group(
+        ...                 modules=pl_module.feature_extractor,
+        ...                 optimizer=optimizer,
+        ...                 train_bn=True,
+        ...             )
+    """
+    def __init__(self):
+        self._internal_optimizer_metadata: Dict[int, List[Dict[str, Any]]] = {}
+        self._restarting = False
+    def state_dict(self) -> Dict[str, Any]:
+        return {
+            "internal_optimizer_metadata": self._internal_optimizer_metadata,
+        }
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        self._restarting = True
+        if "internal_optimizer_metadata" in state_dict:
+            self._internal_optimizer_metadata = state_dict["internal_optimizer_metadata"]
+        else:
+            # compatibility to load from old checkpoints before PR #11887
+            self._internal_optimizer_metadata = state_dict
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        # restore the param_groups created during the previous training.
+        if self._restarting:
+            named_parameters = dict(pl_module.named_parameters())
+            for opt_idx, optimizer in enumerate(trainer.optimizers):
+                param_groups = self._apply_mapping_to_param_groups(
+                    self._internal_optimizer_metadata[opt_idx], named_parameters
+                )
+                optimizer.param_groups = param_groups
+            self._restarting = False
+    @staticmethod
+    def flatten_modules(modules: Union[Module, Iterable[Union[Module, Iterable]]]) -> List[Module]:
+        """This function is used to flatten a module or an iterable of modules into a list of its leaf modules
+        (modules with no children) and parent modules that have parameters directly themselves.
+        Args:
+            modules: A given module or an iterable of modules
+        Returns:
+            List of modules
+        """
+        if isinstance(modules, ModuleDict):
+            modules = modules.values()
+        if isinstance(modules, Iterable):
+            _modules = []
+            for m in modules:
+                _modules.extend(BaseFinetuning.flatten_modules(m))
+        else:
+            _modules = modules.modules()
+        # Capture all leaf modules as well as parent modules that have parameters directly themselves
+        return [m for m in _modules if not list(m.children()) or m._parameters]
+    @staticmethod
+    def filter_params(
+        modules: Union[Module, Iterable[Union[Module, Iterable]]], train_bn: bool = True, requires_grad: bool = True
+    ) -> Generator:
+        """Yields the `requires_grad` parameters of a given module or list of modules.
+        Args:
+            modules: A given module or an iterable of modules
+            train_bn: Whether to train BatchNorm module
+            requires_grad: Whether to create a generator for trainable or non-trainable parameters.
+        Returns:
+            Generator
+        """
+        modules = BaseFinetuning.flatten_modules(modules)
+        for mod in modules:
+            if isinstance(mod, _BatchNorm) and not train_bn:
+                continue
+            # recursion could yield duplicate parameters for parent modules w/ parameters so disabling it
+            for param in mod.parameters(recurse=False):
+                if param.requires_grad == requires_grad:
+                    yield param
+    @staticmethod
+    def make_trainable(modules: Union[Module, Iterable[Union[Module, Iterable]]]) -> None:
+        """Unfreezes the parameters of the provided modules.
+        Args:
+            modules: A given module or an iterable of modules
+        """
+        modules = BaseFinetuning.flatten_modules(modules)
+        for module in modules:
+            # recursion could yield duplicate parameters for parent modules w/ parameters so disabling it
+            for param in module.parameters(recurse=False):
+                param.requires_grad = True
+    @staticmethod
+    def freeze(modules: Union[Module, Iterable[Union[Module, Iterable]]], train_bn: bool = True) -> None:
+        """Freezes the parameters of the provided modules.
+        Args:
+            modules: A given module or an iterable of modules
+            train_bn: If True, leave the BatchNorm layers in training mode
+        Returns:
+            None
+        """
+        modules = BaseFinetuning.flatten_modules(modules)
+        for mod in modules:
+            if isinstance(mod, _BatchNorm) and train_bn:
+                BaseFinetuning.make_trainable(mod)
+            else:
+                # recursion could yield duplicate parameters for parent modules w/ parameters so disabling it
+                for param in mod.parameters(recurse=False):
+                    param.requires_grad = False
+    @staticmethod
+    def filter_on_optimizer(optimizer: Optimizer, params: Iterable) -> List:
+        """This function is used to exclude any parameter which already exists in this optimizer.
+        Args:
+            optimizer: Optimizer used for parameter exclusion
+            params: Iterable of parameters used to check against the provided optimizer
+        Returns:
+            List of parameters not contained in this optimizer param groups
+        """
+        out_params = []
+        removed_params = []
+        for param in params:
+            if not any(torch.equal(p, param) for group in optimizer.param_groups for p in group["params"]):
+                out_params.append(param)
+            else:
+                removed_params.append(param)
+        if removed_params:
+            rank_zero_warn(
+                "The provided params to be frozen already exist within another group of this optimizer."
+                " Those parameters will be skipped.\n"
+                "HINT: Did you init your optimizer in `configure_optimizer` as such:\n"
+                f" {type(optimizer)}(filter(lambda p: p.requires_grad, self.parameters()), ...) ",
+            )
+        return out_params
+    @staticmethod
+    def unfreeze_and_add_param_group(
+        modules: Union[Module, Iterable[Union[Module, Iterable]]],
+        optimizer: Optimizer,
+        lr: Optional[float] = None,
+        initial_denom_lr: float = 10.0,
+        train_bn: bool = True,
+    ) -> None:
+        """Unfreezes a module and adds its parameters to an optimizer.
+        Args:
+            modules: A module or iterable of modules to unfreeze.
+                Their parameters will be added to an optimizer as a new param group.
+            optimizer: The provided optimizer will receive new parameters and will add them to
+                `add_param_group`
+            lr: Learning rate for the new param group.
+            initial_denom_lr: If no lr is provided, the learning from the first param group will be used
+                and divided by `initial_denom_lr`.
+            train_bn: Whether to train the BatchNormalization layers.
+        """
+        BaseFinetuning.make_trainable(modules)
+        params_lr = optimizer.param_groups[0]["lr"] if lr is None else float(lr)
+        denom_lr = initial_denom_lr if lr is None else 1.0
+        params = BaseFinetuning.filter_params(modules, train_bn=train_bn, requires_grad=True)
+        params = BaseFinetuning.filter_on_optimizer(optimizer, params)
+        if params:
+            optimizer.add_param_group({"params": params, "lr": params_lr / denom_lr})
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        self.freeze_before_training(pl_module)
+    @staticmethod
+    def _apply_mapping_to_param_groups(param_groups: List[Dict[str, Any]], mapping: dict) -> List[Dict[str, Any]]:
+        output = []
+        for g in param_groups:
+            # skip params to save memory
+            group_state = {k: v for k, v in g.items() if k != "params"}
+            group_state["params"] = [mapping[p] for p in g["params"]]
+            output.append(group_state)
+        return output
+    def _store(
+        self,
+        pl_module: "pl.LightningModule",
+        opt_idx: int,
+        num_param_groups: int,
+        current_param_groups: List[Dict[str, Any]],
+    ) -> None:
+        mapping = {p: n for n, p in pl_module.named_parameters()}
+        if opt_idx not in self._internal_optimizer_metadata:
+            self._internal_optimizer_metadata[opt_idx] = self._apply_mapping_to_param_groups(
+                current_param_groups, mapping
+            )
+        elif num_param_groups != len(current_param_groups):
+            # save new param_groups possibly created by the users.
+            self._internal_optimizer_metadata[opt_idx].extend(
+                self._apply_mapping_to_param_groups(current_param_groups[num_param_groups:], mapping)
+            )
+    def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Called when the epoch begins."""
+        # import is here to avoid circular imports
+        from pytorch_lightning.loops.utilities import _get_active_optimizers
+        for opt_idx, optimizer in _get_active_optimizers(trainer.optimizers, trainer.optimizer_frequencies):
+            num_param_groups = len(optimizer.param_groups)
+            self.finetune_function(pl_module, trainer.current_epoch, optimizer, opt_idx)
+            current_param_groups = optimizer.param_groups
+            self._store(pl_module, opt_idx, num_param_groups, current_param_groups)
+    def finetune_function(
+        self, pl_module: "pl.LightningModule", epoch: int, optimizer: Optimizer, opt_idx: int
+    ) -> None:
+        """Override to add your unfreeze logic."""
+        raise NotImplementedError
+    def freeze_before_training(self, pl_module: "pl.LightningModule") -> None:
+        """Override to add your freeze logic."""
+        raise NotImplementedError
+class BackboneFinetuning(BaseFinetuning):
+    r"""Finetune a backbone model based on a learning rate user-defined scheduling.
+    When the backbone learning rate reaches the current model learning rate
+    and ``should_align`` is set to True, it will align with it for the rest of the training.
+    Args:
+        unfreeze_backbone_at_epoch: Epoch at which the backbone will be unfreezed.
+        lambda_func: Scheduling function for increasing backbone learning rate.
+        backbone_initial_ratio_lr:
+            Used to scale down the backbone learning rate compared to rest of model
+        backbone_initial_lr: Optional, Initial learning rate for the backbone.
+            By default, we will use ``current_learning /  backbone_initial_ratio_lr``
+        should_align: Whether to align with current learning rate when backbone learning
+            reaches it.
+        initial_denom_lr: When unfreezing the backbone, the initial learning rate will
+            ``current_learning_rate /  initial_denom_lr``.
+        train_bn: Whether to make Batch Normalization trainable.
+        verbose: Display current learning rate for model and backbone
+        rounding: Precision for displaying learning rate
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import BackboneFinetuning
+        >>> multiplicative = lambda epoch: 1.5
+        >>> backbone_finetuning = BackboneFinetuning(200, multiplicative)
+        >>> trainer = Trainer(callbacks=[backbone_finetuning])
+    """
+    def __init__(
+        self,
+        unfreeze_backbone_at_epoch: int = 10,
+        lambda_func: Callable = multiplicative,
+        backbone_initial_ratio_lr: float = 10e-2,
+        backbone_initial_lr: Optional[float] = None,
+        should_align: bool = True,
+        initial_denom_lr: float = 10.0,
+        train_bn: bool = True,
+        verbose: bool = False,
+        rounding: int = 12,
+    ) -> None:
+        super().__init__()
+        self.unfreeze_backbone_at_epoch: int = unfreeze_backbone_at_epoch
+        self.lambda_func: Callable = lambda_func
+        self.backbone_initial_ratio_lr: float = backbone_initial_ratio_lr
+        self.backbone_initial_lr: Optional[float] = backbone_initial_lr
+        self.should_align: bool = should_align
+        self.initial_denom_lr: float = initial_denom_lr
+        self.train_bn: bool = train_bn
+        self.verbose: bool = verbose
+        self.rounding: int = rounding
+        self.previous_backbone_lr: Optional[float] = None
+    def state_dict(self) -> Dict[str, Any]:
+        return {
+            "internal_optimizer_metadata": self._internal_optimizer_metadata,
+            "previous_backbone_lr": self.previous_backbone_lr,
+        }
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        self.previous_backbone_lr = state_dict["previous_backbone_lr"]
+        super().load_state_dict(state_dict)
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """
+        Raises:
+            MisconfigurationException:
+                If LightningModule has no nn.Module `backbone` attribute.
+        """
+        if hasattr(pl_module, "backbone") and isinstance(pl_module.backbone, Module):
+            return super().on_fit_start(trainer, pl_module)
+        raise MisconfigurationException("The LightningModule should have a nn.Module `backbone` attribute")
+    def freeze_before_training(self, pl_module: "pl.LightningModule") -> None:
+        self.freeze(pl_module.backbone)
+    def finetune_function(
+        self, pl_module: "pl.LightningModule", epoch: int, optimizer: Optimizer, opt_idx: int
+    ) -> None:
+        """Called when the epoch begins."""
+        if epoch == self.unfreeze_backbone_at_epoch:
+            current_lr = optimizer.param_groups[0]["lr"]
+            initial_backbone_lr = (
+                self.backbone_initial_lr
+                if self.backbone_initial_lr is not None
+                else current_lr * self.backbone_initial_ratio_lr
+            )
+            self.previous_backbone_lr = initial_backbone_lr
+            self.unfreeze_and_add_param_group(
+                pl_module.backbone,
+                optimizer,
+                initial_backbone_lr,
+                train_bn=self.train_bn,
+                initial_denom_lr=self.initial_denom_lr,
+            )
+            if self.verbose:
+                log.info(
+                    f"Current lr: {round(current_lr, self.rounding)}, "
+                    f"Backbone lr: {round(initial_backbone_lr, self.rounding)}"
+                )
+        elif epoch > self.unfreeze_backbone_at_epoch:
+            current_lr = optimizer.param_groups[0]["lr"]
+            next_current_backbone_lr = self.lambda_func(epoch + 1) * self.previous_backbone_lr
+            next_current_backbone_lr = (
+                current_lr
+                if (self.should_align and next_current_backbone_lr > current_lr)
+                else next_current_backbone_lr
+            )
+            optimizer.param_groups[-1]["lr"] = next_current_backbone_lr
+            self.previous_backbone_lr = next_current_backbone_lr
+            if self.verbose:
+                log.info(
+                    f"Current lr: {round(current_lr, self.rounding)}, "
+                    f"Backbone lr: {round(next_current_backbone_lr, self.rounding)}"
+                )

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/gpu_stats_monitor.py ADDED Viewed

	@@ -0,0 +1,262 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+GPU Stats Monitor
+=================
+Monitor and logs GPU stats during training.
+"""
+import os
+import shutil
+import subprocess
+import time
+from typing import Any, Dict, List, Optional, Tuple
+import torch
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.parsing import AttributeDict
+from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_only
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+class GPUStatsMonitor(Callback):
+    r"""
+    .. deprecated:: v1.5
+        The `GPUStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7.
+        Please use the `DeviceStatsMonitor` callback instead.
+    Automatically monitors and logs GPU stats during training stage. ``GPUStatsMonitor``
+    is a callback and in order to use it you need to assign a logger in the ``Trainer``.
+    Args:
+        memory_utilization: Set to ``True`` to monitor used, free and percentage of memory
+            utilization at the start and end of each step. Default: ``True``.
+        gpu_utilization: Set to ``True`` to monitor percentage of GPU utilization
+            at the start and end of each step. Default: ``True``.
+        intra_step_time: Set to ``True`` to monitor the time of each step. Default: ``False``.
+        inter_step_time: Set to ``True`` to monitor the time between the end of one step
+            and the start of the next step. Default: ``False``.
+        fan_speed: Set to ``True`` to monitor percentage of fan speed. Default: ``False``.
+        temperature: Set to ``True`` to monitor the memory and gpu temperature in degree Celsius.
+            Default: ``False``.
+    Raises:
+        MisconfigurationException:
+            If NVIDIA driver is not installed, not running on GPUs, or ``Trainer`` has no logger.
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import GPUStatsMonitor
+        >>> gpu_stats = GPUStatsMonitor() # doctest: +SKIP
+        >>> trainer = Trainer(callbacks=[gpu_stats]) # doctest: +SKIP
+    GPU stats are mainly based on `nvidia-smi --query-gpu` command. The description of the queries is as follows:
+    - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently
+      intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed.
+      If the fan is physically blocked and unable to spin, this output will not match the actual fan speed.
+      Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.
+    - **memory.used** – Total memory allocated by active contexts.
+    - **memory.free** – Total free memory.
+    - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was
+      executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product.
+    - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was
+      being read or written. The sample period may be between 1 second and 1/6 second depending on the product.
+    - **temperature.gpu** – Core GPU temperature, in degrees C.
+    - **temperature.memory** – HBM memory temperature, in degrees C.
+    """
+    def __init__(
+        self,
+        memory_utilization: bool = True,
+        gpu_utilization: bool = True,
+        intra_step_time: bool = False,
+        inter_step_time: bool = False,
+        fan_speed: bool = False,
+        temperature: bool = False,
+    ):
+        super().__init__()
+        rank_zero_deprecation(
+            "The `GPUStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7."
+            " Please use the `DeviceStatsMonitor` callback instead."
+        )
+        if shutil.which("nvidia-smi") is None:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed."
+            )
+        self._log_stats = AttributeDict(
+            {
+                "memory_utilization": memory_utilization,
+                "gpu_utilization": gpu_utilization,
+                "intra_step_time": intra_step_time,
+                "inter_step_time": inter_step_time,
+                "fan_speed": fan_speed,
+                "temperature": temperature,
+            }
+        )
+        # The logical device IDs for selected devices
+        self._device_ids: List[int] = []  # will be assigned later in setup()
+        # The unmasked real GPU IDs
+        self._gpu_ids: List[str] = []  # will be assigned later in setup()
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        if not trainer.loggers:
+            raise MisconfigurationException("Cannot use GPUStatsMonitor callback with Trainer that has no logger.")
+        if trainer.strategy.root_device.type != "cuda":
+            raise MisconfigurationException(
+                "You are using GPUStatsMonitor but are not running on GPU."
+                f" The root device type is {trainer.strategy.root_device.type}."
+            )
+        # The logical device IDs for selected devices
+        self._device_ids = sorted(set(trainer.device_ids))
+        # The unmasked real GPU IDs
+        self._gpu_ids = self._get_gpu_ids(self._device_ids)
+    def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._snap_intra_step_time: Optional[float] = None
+        self._snap_inter_step_time: Optional[float] = None
+    @rank_zero_only
+    def on_train_batch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", batch: Any, batch_idx: int
+    ) -> None:
+        if self._log_stats.intra_step_time:
+            self._snap_intra_step_time = time.time()
+        if not trainer._logger_connector.should_update_logs:
+            return
+        gpu_stat_keys = self._get_gpu_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+        if self._log_stats.inter_step_time and self._snap_inter_step_time:
+            # First log at beginning of second step
+            logs["batch_time/inter_step (ms)"] = (time.time() - self._snap_inter_step_time) * 1000
+        for logger in trainer.loggers:
+            logger.log_metrics(logs, step=trainer.fit_loop.epoch_loop._batches_that_stepped)
+    @rank_zero_only
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.inter_step_time:
+            self._snap_inter_step_time = time.time()
+        if not trainer._logger_connector.should_update_logs:
+            return
+        gpu_stat_keys = self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+        if self._log_stats.intra_step_time and self._snap_intra_step_time:
+            logs["batch_time/intra_step (ms)"] = (time.time() - self._snap_intra_step_time) * 1000
+        for logger in trainer.loggers:
+            logger.log_metrics(logs, step=trainer.fit_loop.epoch_loop._batches_that_stepped)
+    @staticmethod
+    def _get_gpu_ids(device_ids: List[int]) -> List[str]:
+        """Get the unmasked real GPU IDs."""
+        # All devices if `CUDA_VISIBLE_DEVICES` unset
+        default = ",".join(str(i) for i in range(torch.cuda.device_count()))
+        cuda_visible_devices: List[str] = os.getenv("CUDA_VISIBLE_DEVICES", default=default).split(",")
+        return [cuda_visible_devices[device_id].strip() for device_id in device_ids]
+    def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]:
+        if not queries:
+            return []
+        """Run nvidia-smi to get the gpu stats"""
+        gpu_query = ",".join(queries)
+        format = "csv,nounits,noheader"
+        gpu_ids = ",".join(self._gpu_ids)
+        result = subprocess.run(
+            [
+                # it's ok to suppress the warning here since we ensure nvidia-smi exists during init
+                shutil.which("nvidia-smi"),  # type: ignore
+                f"--query-gpu={gpu_query}",
+                f"--format={format}",
+                f"--id={gpu_ids}",
+            ],
+            encoding="utf-8",
+            capture_output=True,
+            check=True,
+        )
+        def _to_float(x: str) -> float:
+            try:
+                return float(x)
+            except ValueError:
+                return 0.0
+        stats = [[_to_float(x) for x in s.split(", ")] for s in result.stdout.strip().split(os.linesep)]
+        return stats
+    @staticmethod
+    def _parse_gpu_stats(
+        device_ids: List[int], stats: List[List[float]], keys: List[Tuple[str, str]]
+    ) -> Dict[str, float]:
+        """Parse the gpu stats into a loggable dict."""
+        logs = {}
+        for i, device_id in enumerate(device_ids):
+            for j, (x, unit) in enumerate(keys):
+                logs[f"device_id: {device_id}/{x} ({unit})"] = stats[i][j]
+        return logs
+    def _get_gpu_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the GPU stats keys."""
+        stat_keys = []
+        if self._log_stats.gpu_utilization:
+            stat_keys.append(("utilization.gpu", "%"))
+        if self._log_stats.memory_utilization:
+            stat_keys.extend([("memory.used", "MB"), ("memory.free", "MB"), ("utilization.memory", "%")])
+        return stat_keys
+    def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the device stats keys."""
+        stat_keys = []
+        if self._log_stats.fan_speed:
+            stat_keys.append(("fan.speed", "%"))
+        if self._log_stats.temperature:
+            stat_keys.extend([("temperature.gpu", "°C"), ("temperature.memory", "°C")])
+        return stat_keys

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/lambda_function.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Lambda Callback
+^^^^^^^^^^^^^^^
+Create a simple callback on the fly using lambda functions.
+"""
+from typing import Callable, Optional
+from pytorch_lightning.callbacks.base import Callback
+class LambdaCallback(Callback):
+    r"""
+    Create a simple callback on the fly using lambda functions.
+    Args:
+        **kwargs: hooks supported by :class:`~pytorch_lightning.callbacks.base.Callback`
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import LambdaCallback
+        >>> trainer = Trainer(callbacks=[LambdaCallback(setup=lambda *args: print('setup'))])
+    """
+    def __init__(
+        self,
+        on_before_accelerator_backend_setup: Optional[Callable] = None,
+        setup: Optional[Callable] = None,
+        on_configure_sharded_model: Optional[Callable] = None,
+        teardown: Optional[Callable] = None,
+        on_init_start: Optional[Callable] = None,
+        on_init_end: Optional[Callable] = None,
+        on_fit_start: Optional[Callable] = None,
+        on_fit_end: Optional[Callable] = None,
+        on_sanity_check_start: Optional[Callable] = None,
+        on_sanity_check_end: Optional[Callable] = None,
+        on_train_batch_start: Optional[Callable] = None,
+        on_train_batch_end: Optional[Callable] = None,
+        on_train_epoch_start: Optional[Callable] = None,
+        on_train_epoch_end: Optional[Callable] = None,
+        on_validation_epoch_start: Optional[Callable] = None,
+        on_validation_epoch_end: Optional[Callable] = None,
+        on_test_epoch_start: Optional[Callable] = None,
+        on_test_epoch_end: Optional[Callable] = None,
+        on_epoch_start: Optional[Callable] = None,
+        on_epoch_end: Optional[Callable] = None,
+        on_batch_start: Optional[Callable] = None,
+        on_validation_batch_start: Optional[Callable] = None,
+        on_validation_batch_end: Optional[Callable] = None,
+        on_test_batch_start: Optional[Callable] = None,
+        on_test_batch_end: Optional[Callable] = None,
+        on_batch_end: Optional[Callable] = None,
+        on_train_start: Optional[Callable] = None,
+        on_train_end: Optional[Callable] = None,
+        on_pretrain_routine_start: Optional[Callable] = None,
+        on_pretrain_routine_end: Optional[Callable] = None,
+        on_validation_start: Optional[Callable] = None,
+        on_validation_end: Optional[Callable] = None,
+        on_test_start: Optional[Callable] = None,
+        on_test_end: Optional[Callable] = None,
+        on_keyboard_interrupt: Optional[Callable] = None,
+        on_exception: Optional[Callable] = None,
+        on_save_checkpoint: Optional[Callable] = None,
+        on_load_checkpoint: Optional[Callable] = None,
+        on_before_backward: Optional[Callable] = None,
+        on_after_backward: Optional[Callable] = None,
+        on_before_optimizer_step: Optional[Callable] = None,
+        on_before_zero_grad: Optional[Callable] = None,
+        on_predict_start: Optional[Callable] = None,
+        on_predict_end: Optional[Callable] = None,
+        on_predict_batch_start: Optional[Callable] = None,
+        on_predict_batch_end: Optional[Callable] = None,
+        on_predict_epoch_start: Optional[Callable] = None,
+        on_predict_epoch_end: Optional[Callable] = None,
+    ):
+        for k, v in locals().items():
+            if k == "self":
+                continue
+            if v is not None:
+                setattr(self, k, v)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/lr_monitor.py ADDED Viewed

	@@ -0,0 +1,354 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Learning Rate Monitor
+=====================
+Monitor and logs learning rate for lr schedulers during training.
+"""
+import itertools
+from collections import defaultdict
+from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple, Type
+from torch.optim.optimizer import Optimizer
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_warn
+from pytorch_lightning.utilities.types import LRSchedulerConfig
+class LearningRateMonitor(Callback):
+    r"""
+    Automatically monitor and logs learning rate for learning rate schedulers during training.
+    Args:
+        logging_interval: set to ``'epoch'`` or ``'step'`` to log ``lr`` of all optimizers
+            at the same interval, set to ``None`` to log at individual interval
+            according to the ``interval`` key of each scheduler. Defaults to ``None``.
+        log_momentum: option to also log the momentum values of the optimizer, if the optimizer
+            has the ``momentum`` or ``betas`` attribute. Defaults to ``False``.
+    Raises:
+        MisconfigurationException:
+            If ``logging_interval`` is none of ``"step"``, ``"epoch"``, or ``None``.
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import LearningRateMonitor
+        >>> lr_monitor = LearningRateMonitor(logging_interval='step')
+        >>> trainer = Trainer(callbacks=[lr_monitor])
+    Logging names are automatically determined based on optimizer class name.
+    In case of multiple optimizers of same type, they will be named ``Adam``,
+    ``Adam-1`` etc. If a optimizer has multiple parameter groups they will
+    be named ``Adam/pg1``, ``Adam/pg2`` etc. To control naming, pass in a
+    ``name`` keyword in the construction of the learning rate schedulers.
+    A ``name`` keyword can also be used for parameter groups in the
+    construction of the optimizer.
+    Example::
+        def configure_optimizer(self):
+            optimizer = torch.optim.Adam(...)
+            lr_scheduler = {
+                'scheduler': torch.optim.lr_scheduler.LambdaLR(optimizer, ...)
+                'name': 'my_logging_name'
+            }
+            return [optimizer], [lr_scheduler]
+    Example::
+        def configure_optimizer(self):
+            optimizer = torch.optim.SGD(
+                [{
+                    'params': [p for p in self.parameters()],
+                    'name': 'my_parameter_group_name'
+                }],
+                lr=0.1
+            )
+            lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, ...)
+            return [optimizer], [lr_scheduler]
+    """
+    def __init__(self, logging_interval: Optional[str] = None, log_momentum: bool = False) -> None:
+        if logging_interval not in (None, "step", "epoch"):
+            raise MisconfigurationException("logging_interval should be `step` or `epoch` or `None`.")
+        self.logging_interval = logging_interval
+        self.log_momentum = log_momentum
+        self.lrs: Dict[str, List[float]] = {}
+        self._lr_sch_names: List[str] = []
+    def on_train_start(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        """Called before training, determines unique names for all lr schedulers in the case of multiple of the
+        same type or in the case of multiple parameter groups.
+        Raises:
+            MisconfigurationException:
+                If ``Trainer`` has no ``logger``.
+        """
+        if not trainer.loggers:
+            raise MisconfigurationException(
+                "Cannot use `LearningRateMonitor` callback with `Trainer` that has no logger."
+            )
+        if self.log_momentum:
+            def _check_no_key(key: str) -> bool:
+                if trainer.lr_scheduler_configs:
+                    return any(
+                        key not in config.scheduler.optimizer.defaults for config in trainer.lr_scheduler_configs
+                    )
+                return any(key not in optimizer.defaults for optimizer in trainer.optimizers)
+            if _check_no_key("momentum") and _check_no_key("betas"):
+                rank_zero_warn(
+                    "You have set log_momentum=True, but some optimizers do not"
+                    " have momentum. This will log a value 0 for the momentum.",
+                    category=RuntimeWarning,
+                )
+        # Find names for schedulers
+        names: List[List[str]] = []
+        (
+            sched_hparam_keys,
+            optimizers_with_scheduler,
+            optimizers_with_scheduler_types,
+        ) = self._find_names_from_schedulers(trainer.lr_scheduler_configs)
+        names.extend(sched_hparam_keys)
+        # Find names for leftover optimizers
+        optimizer_hparam_keys, _ = self._find_names_from_optimizers(
+            trainer.optimizers,
+            seen_optimizers=optimizers_with_scheduler,
+            seen_optimizer_types=optimizers_with_scheduler_types,
+        )
+        names.extend(optimizer_hparam_keys)
+        # Initialize for storing values
+        names_flatten = list(itertools.chain.from_iterable(names))
+        self.lrs = {name: [] for name in names_flatten}
+        self.last_momentum_values = {name + "-momentum": None for name in names_flatten}
+    def on_train_batch_start(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        if not trainer._logger_connector.should_update_logs:
+            return
+        if self.logging_interval != "epoch":
+            interval = "step" if self.logging_interval is None else "any"
+            latest_stat = self._extract_stats(trainer, interval)
+            if latest_stat:
+                for logger in trainer.loggers:
+                    logger.log_metrics(latest_stat, step=trainer.fit_loop.epoch_loop._batches_that_stepped)
+    def on_train_epoch_start(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        if self.logging_interval != "step":
+            interval = "epoch" if self.logging_interval is None else "any"
+            latest_stat = self._extract_stats(trainer, interval)
+            if latest_stat:
+                for logger in trainer.loggers:
+                    logger.log_metrics(latest_stat, step=trainer.fit_loop.epoch_loop._batches_that_stepped)
+    def _extract_stats(self, trainer: "pl.Trainer", interval: str) -> Dict[str, float]:
+        latest_stat = {}
+        (
+            scheduler_hparam_keys,
+            optimizers_with_scheduler,
+            optimizers_with_scheduler_types,
+        ) = self._find_names_from_schedulers(trainer.lr_scheduler_configs, add_lr_sch_names=False)
+        self._remap_keys(scheduler_hparam_keys)
+        for name, config in zip(scheduler_hparam_keys, trainer.lr_scheduler_configs):
+            if interval in [config.interval, "any"]:
+                opt = config.scheduler.optimizer
+                current_stat = self._get_lr_momentum_stat(opt, name)
+                latest_stat.update(current_stat)
+        optimizer_hparam_keys, optimizers_without_scheduler = self._find_names_from_optimizers(
+            trainer.optimizers,
+            seen_optimizers=optimizers_with_scheduler,
+            seen_optimizer_types=optimizers_with_scheduler_types,
+            add_lr_sch_names=False,
+        )
+        self._remap_keys(optimizer_hparam_keys)
+        for opt, names in zip(optimizers_without_scheduler, optimizer_hparam_keys):
+            current_stat = self._get_lr_momentum_stat(opt, names)
+            latest_stat.update(current_stat)
+        return latest_stat
+    def _get_lr_momentum_stat(self, optimizer: Optimizer, names: List[str]) -> Dict[str, float]:
+        lr_momentum_stat = {}
+        param_groups = optimizer.param_groups
+        use_betas = "betas" in optimizer.defaults
+        for pg, name in zip(param_groups, names):
+            lr = self._extract_lr(pg, name)
+            lr_momentum_stat.update(lr)
+            momentum = self._extract_momentum(
+                param_group=pg, name=name.replace(name, f"{name}-momentum"), use_betas=use_betas
+            )
+            lr_momentum_stat.update(momentum)
+        return lr_momentum_stat
+    def _extract_lr(self, param_group: Dict[str, Any], name: str) -> Dict[str, Any]:
+        lr = param_group["lr"]
+        self.lrs[name].append(lr)
+        return {name: lr}
+    def _remap_keys(self, names: List[List[str]], token: str = "/pg1") -> None:
+        """This function is used the remap the keys if param groups for a given optimizer increased."""
+        for group_new_names in names:
+            for new_name in group_new_names:
+                old_name = new_name.replace(token, "")
+                if token in new_name and old_name in self.lrs:
+                    self.lrs[new_name] = self.lrs.pop(old_name)
+                elif new_name not in self.lrs:
+                    self.lrs[new_name] = []
+    def _extract_momentum(self, param_group: Dict[str, List], name: str, use_betas: bool) -> Dict[str, float]:
+        if not self.log_momentum:
+            return {}
+        momentum = param_group["betas"][0] if use_betas else param_group.get("momentum", 0)
+        self.last_momentum_values[name] = momentum
+        return {name: momentum}
+    def _add_prefix(
+        self, name: str, optimizer_cls: Type[Optimizer], seen_optimizer_types: DefaultDict[Type[Optimizer], int]
+    ) -> str:
+        if optimizer_cls not in seen_optimizer_types:
+            return name
+        count = seen_optimizer_types[optimizer_cls]
+        return name + f"-{count - 1}" if count > 1 else name
+    def _add_suffix(self, name: str, param_groups: List[Dict], param_group_index: int, use_names: bool = True) -> str:
+        if len(param_groups) > 1:
+            if not use_names:
+                return f"{name}/pg{param_group_index+1}"
+            pg_name = param_groups[param_group_index].get("name", f"pg{param_group_index+1}")
+            return f"{name}/{pg_name}"
+        elif use_names:
+            pg_name = param_groups[param_group_index].get("name")
+            return f"{name}/{pg_name}" if pg_name else name
+        return name
+    def _duplicate_param_group_names(self, param_groups: List[Dict]) -> Set[str]:
+        names = [pg.get("name", f"pg{i}") for i, pg in enumerate(param_groups, start=1)]
+        unique = set(names)
+        if len(names) == len(unique):
+            return set()
+        return {n for n in names if names.count(n) > 1}
+    def _find_names_from_schedulers(
+        self, lr_scheduler_configs: List[LRSchedulerConfig], add_lr_sch_names: bool = True
+    ) -> Tuple[List[List[str]], List[Optimizer], DefaultDict[Type[Optimizer], int]]:
+        # Create unique names in the case we have multiple of the same learning
+        # rate scheduler + multiple parameter groups
+        names = []
+        seen_optimizers: List[Optimizer] = []
+        seen_optimizer_types: DefaultDict[Type[Optimizer], int] = defaultdict(int)
+        for config in lr_scheduler_configs:
+            sch = config.scheduler
+            if config.name is not None:
+                name = config.name
+            else:
+                name = "lr-" + sch.optimizer.__class__.__name__
+            updated_names = self._check_duplicates_and_update_name(
+                sch.optimizer, name, seen_optimizers, seen_optimizer_types, config, add_lr_sch_names
+            )
+            names.append(updated_names)
+        return names, seen_optimizers, seen_optimizer_types
+    def _find_names_from_optimizers(
+        self,
+        optimizers: List[Any],
+        seen_optimizers: List[Optimizer],
+        seen_optimizer_types: DefaultDict[Type[Optimizer], int],
+        add_lr_sch_names: bool = True,
+    ) -> Tuple[List[List[str]], List[Optimizer]]:
+        names = []
+        optimizers_without_scheduler = []
+        for optimizer in optimizers:
+            # Deepspeed optimizer wraps the native optimizer
+            optimizer = optimizer.optimizer if hasattr(optimizer, "optimizer") else optimizer
+            if optimizer in seen_optimizers:
+                continue
+            name = "lr-" + optimizer.__class__.__name__
+            updated_names = self._check_duplicates_and_update_name(
+                optimizer, name, seen_optimizers, seen_optimizer_types, None, add_lr_sch_names
+            )
+            names.append(updated_names)
+            optimizers_without_scheduler.append(optimizer)
+        return names, optimizers_without_scheduler
+    def _check_duplicates_and_update_name(
+        self,
+        optimizer: Optimizer,
+        name: str,
+        seen_optimizers: List[Optimizer],
+        seen_optimizer_types: DefaultDict[Type[Optimizer], int],
+        lr_scheduler_config: Optional[LRSchedulerConfig],
+        add_lr_sch_names: bool = True,
+    ) -> List[str]:
+        seen_optimizers.append(optimizer)
+        optimizer_cls = type(optimizer)
+        if lr_scheduler_config is not None and lr_scheduler_config.name is None:
+            seen_optimizer_types[optimizer_cls] += 1
+        elif lr_scheduler_config is None:
+            seen_optimizer_types[optimizer_cls] += 1
+        # Multiple param groups for the same optimizer
+        param_groups = optimizer.param_groups
+        duplicates = self._duplicate_param_group_names(param_groups)
+        if duplicates:
+            raise MisconfigurationException(
+                "A single `Optimizer` cannot have multiple parameter groups with identical "
+                f"`name` values. {name} has duplicated parameter group names {duplicates}"
+            )
+        name = self._add_prefix(name, optimizer_cls, seen_optimizer_types)
+        name_list = [self._add_suffix(name, param_groups, i) for i in range(len(param_groups))]
+        if add_lr_sch_names:
+            self._lr_sch_names.append(name)
+        return name_list
+    @property
+    def lr_sch_names(self) -> List[str]:
+        # TODO remove `lr_sch_names` and `add_lr_sch_names` argument in v1.7.0
+        rank_zero_deprecation(
+            "`LearningRateMonitor.lr_sch_names` has been deprecated in v1.5 and will be removed in 1.7."
+            " Consider accessing them using `LearningRateMonitor.lrs.keys()` which will return"
+            " the names of all the optimizers, even those without a scheduler."
+        )
+        return self._lr_sch_names

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py ADDED Viewed

	@@ -0,0 +1,720 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Model Checkpointing
+===================
+Automatically save model checkpoints during training.
+"""
+import logging
+import os
+import re
+import time
+import warnings
+from copy import deepcopy
+from datetime import timedelta
+from typing import Any, Dict, Optional
+from weakref import proxy
+import numpy as np
+import torch
+import yaml
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.cloud_io import get_filesystem
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.logger import _name, _version
+from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn
+from pytorch_lightning.utilities.types import _METRIC, _PATH, STEP_OUTPUT
+from pytorch_lightning.utilities.warnings import WarningCache
+log = logging.getLogger(__name__)
+warning_cache = WarningCache()
+class ModelCheckpoint(Callback):
+    r"""
+    Save the model periodically by monitoring a quantity. Every metric logged with
+    :meth:`~pytorch_lightning.core.lightning.log` or :meth:`~pytorch_lightning.core.lightning.log_dict` in
+    LightningModule is a candidate for the monitor key. For more information, see
+    :ref:`checkpointing`.
+    After training finishes, use :attr:`best_model_path` to retrieve the path to the
+    best checkpoint file and :attr:`best_model_score` to retrieve its score.
+    Args:
+        dirpath: directory to save the model file.
+            Example::
+                # custom path
+                # saves a file like: my/path/epoch=0-step=10.ckpt
+                >>> checkpoint_callback = ModelCheckpoint(dirpath='my/path/')
+            By default, dirpath is ``None`` and will be set at runtime to the location
+            specified by :class:`~pytorch_lightning.trainer.trainer.Trainer`'s
+            :paramref:`~pytorch_lightning.trainer.trainer.Trainer.default_root_dir` or
+            :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_save_path` arguments,
+            and if the Trainer uses a logger, the path will also contain logger name and version.
+        filename: checkpoint filename. Can contain named formatting options to be auto-filled.
+            Example::
+                # save any arbitrary metrics like `val_loss`, etc. in name
+                # saves a file like: my/path/epoch=2-val_loss=0.02-other_metric=0.03.ckpt
+                >>> checkpoint_callback = ModelCheckpoint(
+                ...     dirpath='my/path',
+                ...     filename='{epoch}-{val_loss:.2f}-{other_metric:.2f}'
+                ... )
+            By default, filename is ``None`` and will be set to ``'{epoch}-{step}'``.
+        monitor: quantity to monitor. By default it is ``None`` which saves a checkpoint only for the last epoch.
+        verbose: verbosity mode. Default: ``False``.
+        save_last: When ``True``, saves an exact copy of the checkpoint to a file `last.ckpt` whenever a checkpoint
+            file gets saved. This allows accessing the latest checkpoint in a deterministic manner. Default: ``None``.
+        save_top_k: if ``save_top_k == k``,
+            the best k models according to
+            the quantity monitored will be saved.
+            if ``save_top_k == 0``, no models are saved.
+            if ``save_top_k == -1``, all models are saved.
+            Please note that the monitors are checked every ``every_n_epochs`` epochs.
+            if ``save_top_k >= 2`` and the callback is called multiple
+            times inside an epoch, the name of the saved file will be
+            appended with a version count starting with ``v1``.
+        mode: one of {min, max}.
+            If ``save_top_k != 0``, the decision to overwrite the current save file is made
+            based on either the maximization or the minimization of the monitored quantity.
+            For ``'val_acc'``, this should be ``'max'``, for ``'val_loss'`` this should be ``'min'``, etc.
+        auto_insert_metric_name: When ``True``, the checkpoints filenames will contain the metric name.
+            For example, ``filename='checkpoint_{epoch:02d}-{acc:02.0f}`` with epoch ``1`` and acc ``1.12`` will resolve
+            to ``checkpoint_epoch=01-acc=01.ckpt``. Is useful to set it to ``False`` when metric names contain ``/``
+            as this will result in extra folders.
+        save_weights_only: if ``True``, then only the model's weights will be
+            saved. Otherwise, the optimizer states, lr-scheduler states, etc are added in the checkpoint too.
+        every_n_train_steps: Number of training steps between checkpoints.
+            If ``every_n_train_steps == None or every_n_train_steps == 0``, we skip saving during training.
+            To disable, set ``every_n_train_steps = 0``. This value must be ``None`` or non-negative.
+            This must be mutually exclusive with ``train_time_interval`` and ``every_n_epochs``.
+        train_time_interval: Checkpoints are monitored at the specified time interval.
+            For all practical purposes, this cannot be smaller than the amount
+            of time it takes to process a single training batch. This is not
+            guaranteed to execute at the exact time specified, but should be close.
+            This must be mutually exclusive with ``every_n_train_steps`` and ``every_n_epochs``.
+        every_n_epochs: Number of epochs between checkpoints.
+            This value must be ``None`` or non-negative.
+            To disable saving top-k checkpoints, set ``every_n_epochs = 0``.
+            This argument does not impact the saving of ``save_last=True`` checkpoints.
+            If all of ``every_n_epochs``, ``every_n_train_steps`` and
+            ``train_time_interval`` are ``None``, we save a checkpoint at the end of every epoch
+            (equivalent to ``every_n_epochs = 1``).
+            If ``every_n_epochs == None`` and either ``every_n_train_steps != None`` or ``train_time_interval != None``,
+            saving at the end of each epoch is disabled
+            (equivalent to ``every_n_epochs = 0``).
+            This must be mutually exclusive with ``every_n_train_steps`` and ``train_time_interval``.
+            Setting both ``ModelCheckpoint(..., every_n_epochs=V, save_on_train_epoch_end=False)`` and
+            ``Trainer(max_epochs=N, check_val_every_n_epoch=M)``
+            will only save checkpoints at epochs 0 < E <= N
+            where both values for ``every_n_epochs`` and ``check_val_every_n_epoch`` evenly divide E.
+        save_on_train_epoch_end: Whether to run checkpointing at the end of the training epoch.
+            If this is ``False``, then the check runs at the end of the validation.
+    Note:
+        For extra customization, ModelCheckpoint includes the following attributes:
+        - ``CHECKPOINT_JOIN_CHAR = "-"``
+        - ``CHECKPOINT_NAME_LAST = "last"``
+        - ``FILE_EXTENSION = ".ckpt"``
+        - ``STARTING_VERSION = 1``
+        For example, you can change the default last checkpoint name by doing
+        ``checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last"``
+        If you want to checkpoint every N hours, every M train batches, and/or every K val epochs,
+        then you should create multiple ``ModelCheckpoint`` callbacks.
+        If the checkpoint's ``dirpath`` changed from what it was before while resuming the training,
+        only ``best_model_path`` will be reloaded and a warning will be issued.
+    Raises:
+        MisconfigurationException:
+            If ``save_top_k`` is smaller than ``-1``,
+            if ``monitor`` is ``None`` and ``save_top_k`` is none of ``None``, ``-1``, and ``0``, or
+            if ``mode`` is none of ``"min"`` or ``"max"``.
+        ValueError:
+            If ``trainer.save_checkpoint`` is ``None``.
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import ModelCheckpoint
+        # saves checkpoints to 'my/path/' at every epoch
+        >>> checkpoint_callback = ModelCheckpoint(dirpath='my/path/')
+        >>> trainer = Trainer(callbacks=[checkpoint_callback])
+        # save epoch and val_loss in name
+        # saves a file like: my/path/sample-mnist-epoch=02-val_loss=0.32.ckpt
+        >>> checkpoint_callback = ModelCheckpoint(
+        ...     monitor='val_loss',
+        ...     dirpath='my/path/',
+        ...     filename='sample-mnist-{epoch:02d}-{val_loss:.2f}'
+        ... )
+        # save epoch and val_loss in name, but specify the formatting yourself (e.g. to avoid problems with Tensorboard
+        # or Neptune, due to the presence of characters like '=' or '/')
+        # saves a file like: my/path/sample-mnist-epoch02-val_loss0.32.ckpt
+        >>> checkpoint_callback = ModelCheckpoint(
+        ...     monitor='val/loss',
+        ...     dirpath='my/path/',
+        ...     filename='sample-mnist-epoch{epoch:02d}-val_loss{val/loss:.2f}',
+        ...     auto_insert_metric_name=False
+        ... )
+        # retrieve the best checkpoint after training
+        checkpoint_callback = ModelCheckpoint(dirpath='my/path/')
+        trainer = Trainer(callbacks=[checkpoint_callback])
+        model = ...
+        trainer.fit(model)
+        checkpoint_callback.best_model_path
+    .. tip:: Saving and restoring multiple checkpoint callbacks at the same time is supported under variation in the
+        following arguments:
+        *monitor, mode, every_n_train_steps, every_n_epochs, train_time_interval, save_on_train_epoch_end*
+        Read more: :ref:`Persisting Callback State`
+    """
+    CHECKPOINT_JOIN_CHAR = "-"
+    CHECKPOINT_NAME_LAST = "last"
+    FILE_EXTENSION = ".ckpt"
+    STARTING_VERSION = 1
+    def __init__(
+        self,
+        dirpath: Optional[_PATH] = None,
+        filename: Optional[str] = None,
+        monitor: Optional[str] = None,
+        verbose: bool = False,
+        save_last: Optional[bool] = None,
+        save_top_k: int = 1,
+        save_weights_only: bool = False,
+        mode: str = "min",
+        auto_insert_metric_name: bool = True,
+        every_n_train_steps: Optional[int] = None,
+        train_time_interval: Optional[timedelta] = None,
+        every_n_epochs: Optional[int] = None,
+        save_on_train_epoch_end: Optional[bool] = None,
+    ):
+        super().__init__()
+        self.monitor = monitor
+        self.verbose = verbose
+        self.save_last = save_last
+        self.save_top_k = save_top_k
+        self.save_weights_only = save_weights_only
+        self.auto_insert_metric_name = auto_insert_metric_name
+        self._save_on_train_epoch_end = save_on_train_epoch_end
+        self._last_global_step_saved = 0  # no need to save when no steps were taken
+        self._last_time_checked: Optional[float] = None
+        self.current_score = None
+        self.best_k_models = {}
+        self.kth_best_model_path = ""
+        self.best_model_score = None
+        self.best_model_path = ""
+        self.last_model_path = ""
+        self.__init_monitor_mode(mode)
+        self.__init_ckpt_dir(dirpath, filename)
+        self.__init_triggers(every_n_train_steps, every_n_epochs, train_time_interval)
+        self.__validate_init_configuration()
+    @property
+    def state_key(self) -> str:
+        return self._generate_state_key(
+            monitor=self.monitor,
+            mode=self.mode,
+            every_n_train_steps=self._every_n_train_steps,
+            every_n_epochs=self._every_n_epochs,
+            train_time_interval=self._train_time_interval,
+            save_on_train_epoch_end=self._save_on_train_epoch_end,
+        )
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        self.__resolve_ckpt_dir(trainer)
+        if trainer.is_global_zero and stage == "fit":
+            self.__warn_if_dir_not_empty(self.dirpath)
+        # NOTE: setting these attributes needs to happen as early as possible BEFORE reloading callback states,
+        # because the attributes are part of the state_key which needs to be fully defined before reloading.
+        if self._save_on_train_epoch_end is None:
+            # if the user runs validation multiple times per training epoch or multiple training epochs without
+            # validation, then we run after validation instead of on train epoch end
+            self._save_on_train_epoch_end = trainer.val_check_interval == 1.0 and trainer.check_val_every_n_epoch == 1
+    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._last_time_checked = time.monotonic()
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        """Save checkpoint on train batch end if we meet the criteria for `every_n_train_steps`"""
+        if self._should_skip_saving_checkpoint(trainer):
+            return
+        skip_batch = self._every_n_train_steps < 1 or (trainer.global_step % self._every_n_train_steps != 0)
+        train_time_interval = self._train_time_interval
+        skip_time = True
+        now = time.monotonic()
+        if train_time_interval:
+            prev_time_check = self._last_time_checked
+            skip_time = prev_time_check is None or (now - prev_time_check) < train_time_interval.total_seconds()
+            # in case we have time differences across ranks
+            # broadcast the decision on whether to checkpoint from rank 0 to avoid possible hangs
+            skip_time = trainer.strategy.broadcast(skip_time)
+        if skip_batch and skip_time:
+            return
+        if not skip_time:
+            self._last_time_checked = now
+        monitor_candidates = self._monitor_candidates(trainer)
+        self._save_topk_checkpoint(trainer, monitor_candidates)
+        self._save_last_checkpoint(trainer, monitor_candidates)
+    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Save a checkpoint at the end of the training epoch."""
+        if not self._should_skip_saving_checkpoint(trainer) and self._save_on_train_epoch_end:
+            monitor_candidates = self._monitor_candidates(trainer)
+            if self._every_n_epochs >= 1 and (trainer.current_epoch + 1) % self._every_n_epochs == 0:
+                self._save_topk_checkpoint(trainer, monitor_candidates)
+            self._save_last_checkpoint(trainer, monitor_candidates)
+    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        """Save a checkpoint at the end of the validation stage."""
+        if not self._should_skip_saving_checkpoint(trainer) and not self._save_on_train_epoch_end:
+            monitor_candidates = self._monitor_candidates(trainer)
+            if self._every_n_epochs >= 1 and (trainer.current_epoch + 1) % self._every_n_epochs == 0:
+                self._save_topk_checkpoint(trainer, monitor_candidates)
+            self._save_last_checkpoint(trainer, monitor_candidates)
+    def state_dict(self) -> Dict[str, Any]:
+        return {
+            "monitor": self.monitor,
+            "best_model_score": self.best_model_score,
+            "best_model_path": self.best_model_path,
+            "current_score": self.current_score,
+            "dirpath": self.dirpath,
+            "best_k_models": self.best_k_models,
+            "kth_best_model_path": self.kth_best_model_path,
+            "kth_value": self.kth_value,
+            "last_model_path": self.last_model_path,
+        }
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        dirpath_from_ckpt = state_dict.get("dirpath", self.dirpath)
+        if self.dirpath == dirpath_from_ckpt:
+            self.best_model_score = state_dict["best_model_score"]
+            self.kth_best_model_path = state_dict.get("kth_best_model_path", self.kth_best_model_path)
+            self.kth_value = state_dict.get("kth_value", self.kth_value)
+            self.best_k_models = state_dict.get("best_k_models", self.best_k_models)
+            self.last_model_path = state_dict.get("last_model_path", self.last_model_path)
+        else:
+            warnings.warn(
+                f"The dirpath has changed from {dirpath_from_ckpt!r} to {self.dirpath!r},"
+                " therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and"
+                " `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded."
+            )
+        self.best_model_path = state_dict["best_model_path"]
+    def save_checkpoint(self, trainer: "pl.Trainer") -> None:  # pragma: no-cover
+        """Performs the main logic around saving a checkpoint.
+        This method runs on all ranks. It is the responsibility of `trainer.save_checkpoint` to correctly handle the
+        behaviour in distributed training, i.e., saving only on rank 0 for data parallel use cases.
+        """
+        rank_zero_deprecation(
+            f"`{self.__class__.__name__}.save_checkpoint()` was deprecated in v1.6 and will be removed in v1.8."
+            " Instead, you can use `trainer.save_checkpoint()` to manually save a checkpoint."
+        )
+        monitor_candidates = self._monitor_candidates(trainer)
+        self._save_topk_checkpoint(trainer, monitor_candidates)
+        self._save_last_checkpoint(trainer, monitor_candidates)
+    def _save_topk_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[str, _METRIC]) -> None:
+        if self.save_top_k == 0:
+            return
+        # validate metric
+        if self.monitor is not None:
+            if self.monitor not in monitor_candidates:
+                m = (
+                    f"`ModelCheckpoint(monitor={self.monitor!r})` could not find the monitored key in the returned"
+                    f" metrics: {list(monitor_candidates)}."
+                    f" HINT: Did you call `log({self.monitor!r}, value)` in the `LightningModule`?"
+                )
+                if trainer.fit_loop.epoch_loop.val_loop._has_run:
+                    raise MisconfigurationException(m)
+                warning_cache.warn(m)
+            self._save_monitor_checkpoint(trainer, monitor_candidates)
+        else:
+            self._save_none_monitor_checkpoint(trainer, monitor_candidates)
+    def _save_checkpoint(self, trainer: "pl.Trainer", filepath: str) -> None:
+        trainer.save_checkpoint(filepath, self.save_weights_only)
+        self._last_global_step_saved = trainer.global_step
+        # notify loggers
+        if trainer.is_global_zero:
+            for logger in trainer.loggers:
+                logger.after_save_checkpoint(proxy(self))
+    def _should_skip_saving_checkpoint(self, trainer: "pl.Trainer") -> bool:
+        from pytorch_lightning.trainer.states import TrainerFn
+        return (
+            trainer.fast_dev_run  # disable checkpointing with fast_dev_run
+            or trainer.state.fn != TrainerFn.FITTING  # don't save anything during non-fit
+            or trainer.sanity_checking  # don't save anything during sanity check
+            or self._last_global_step_saved == trainer.global_step  # already saved at the last step
+        )
+    def __validate_init_configuration(self) -> None:
+        if self.save_top_k < -1:
+            raise MisconfigurationException(f"Invalid value for save_top_k={self.save_top_k}. Must be >= -1")
+        if self._every_n_train_steps < 0:
+            raise MisconfigurationException(
+                f"Invalid value for every_n_train_steps={self._every_n_train_steps}. Must be >= 0"
+            )
+        if self._every_n_epochs < 0:
+            raise MisconfigurationException(f"Invalid value for every_n_epochs={self._every_n_epochs}. Must be >= 0")
+        every_n_train_steps_triggered = self._every_n_train_steps >= 1
+        every_n_epochs_triggered = self._every_n_epochs >= 1
+        train_time_interval_triggered = self._train_time_interval is not None
+        if every_n_train_steps_triggered + every_n_epochs_triggered + train_time_interval_triggered > 1:
+            raise MisconfigurationException(
+                f"Combination of parameters every_n_train_steps={self._every_n_train_steps}, "
+                f"every_n_epochs={self._every_n_epochs} and train_time_interval={self._train_time_interval} "
+                "should be mutually exclusive."
+            )
+        if self.monitor is None:
+            # -1: save all epochs, 0: nothing is saved, 1: save last epoch
+            if self.save_top_k not in (-1, 0, 1):
+                raise MisconfigurationException(
+                    f"ModelCheckpoint(save_top_k={self.save_top_k}, monitor=None) is not a valid"
+                    " configuration. No quantity for top_k to track."
+                )
+            if self.save_top_k == -1 and self.save_last:
+                rank_zero_info(
+                    "ModelCheckpoint(save_last=True, save_top_k=-1, monitor=None)"
+                    " will duplicate the last checkpoint saved."
+                )
+    def __init_ckpt_dir(self, dirpath: Optional[_PATH], filename: Optional[str]) -> None:
+        self._fs = get_filesystem(dirpath if dirpath else "")
+        if dirpath and self._fs.protocol == "file":
+            dirpath = os.path.realpath(dirpath)
+        self.dirpath = dirpath
+        self.filename = filename
+    def __init_monitor_mode(self, mode: str) -> None:
+        torch_inf = torch.tensor(np.Inf)
+        mode_dict = {"min": (torch_inf, "min"), "max": (-torch_inf, "max")}
+        if mode not in mode_dict:
+            raise MisconfigurationException(f"`mode` can be {', '.join(mode_dict.keys())} but got {mode}")
+        self.kth_value, self.mode = mode_dict[mode]
+    def __init_triggers(
+        self,
+        every_n_train_steps: Optional[int],
+        every_n_epochs: Optional[int],
+        train_time_interval: Optional[timedelta],
+    ) -> None:
+        # Default to running once after each validation epoch if neither
+        # every_n_train_steps nor every_n_epochs is set
+        if every_n_train_steps is None and every_n_epochs is None and train_time_interval is None:
+            every_n_epochs = 1
+            every_n_train_steps = 0
+            log.debug("Both every_n_train_steps and every_n_epochs are not set. Setting every_n_epochs=1")
+        else:
+            every_n_epochs = every_n_epochs or 0
+            every_n_train_steps = every_n_train_steps or 0
+        self._train_time_interval: Optional[timedelta] = train_time_interval
+        self._every_n_epochs: int = every_n_epochs
+        self._every_n_train_steps: int = every_n_train_steps
+    @property
+    def every_n_epochs(self) -> Optional[int]:
+        return self._every_n_epochs
+    def check_monitor_top_k(self, trainer: "pl.Trainer", current: Optional[torch.Tensor] = None) -> bool:
+        if current is None:
+            return False
+        if self.save_top_k == -1:
+            return True
+        less_than_k_models = len(self.best_k_models) < self.save_top_k
+        if less_than_k_models:
+            return True
+        monitor_op = {"min": torch.lt, "max": torch.gt}[self.mode]
+        should_update_best_and_save = monitor_op(current, self.best_k_models[self.kth_best_model_path])
+        # If using multiple devices, make sure all processes are unanimous on the decision.
+        should_update_best_and_save = trainer.strategy.reduce_boolean_decision(should_update_best_and_save)
+        return should_update_best_and_save
+    @classmethod
+    def _format_checkpoint_name(
+        cls,
+        filename: Optional[str],
+        metrics: Dict[str, _METRIC],
+        prefix: str = "",
+        auto_insert_metric_name: bool = True,
+    ) -> str:
+        if not filename:
+            # filename is not set, use default name
+            filename = "{epoch}" + cls.CHECKPOINT_JOIN_CHAR + "{step}"
+        # check and parse user passed keys in the string
+        groups = re.findall(r"(\{.*?)[:\}]", filename)
+        if len(groups) >= 0:
+            for group in groups:
+                name = group[1:]
+                if auto_insert_metric_name:
+                    filename = filename.replace(group, name + "={" + name)
+                # support for dots: https://stackoverflow.com/a/7934969
+                filename = filename.replace(group, f"{{0[{name}]")
+                if name not in metrics:
+                    metrics[name] = 0
+            filename = filename.format(metrics)
+        if prefix:
+            filename = cls.CHECKPOINT_JOIN_CHAR.join([prefix, filename])
+        return filename
+    def format_checkpoint_name(
+        self, metrics: Dict[str, _METRIC], filename: Optional[str] = None, ver: Optional[int] = None
+    ) -> str:
+        """Generate a filename according to the defined template.
+        Example::
+            >>> tmpdir = os.path.dirname(__file__)
+            >>> ckpt = ModelCheckpoint(dirpath=tmpdir, filename='{epoch}')
+            >>> os.path.basename(ckpt.format_checkpoint_name(dict(epoch=0)))
+            'epoch=0.ckpt'
+            >>> ckpt = ModelCheckpoint(dirpath=tmpdir, filename='{epoch:03d}')
+            >>> os.path.basename(ckpt.format_checkpoint_name(dict(epoch=5)))
+            'epoch=005.ckpt'
+            >>> ckpt = ModelCheckpoint(dirpath=tmpdir, filename='{epoch}-{val_loss:.2f}')
+            >>> os.path.basename(ckpt.format_checkpoint_name(dict(epoch=2, val_loss=0.123456)))
+            'epoch=2-val_loss=0.12.ckpt'
+            >>> os.path.basename(ckpt.format_checkpoint_name(dict(epoch=2, val_loss=0.12), filename='{epoch:d}'))
+            'epoch=2.ckpt'
+            >>> ckpt = ModelCheckpoint(dirpath=tmpdir,
+            ... filename='epoch={epoch}-validation_loss={val_loss:.2f}',
+            ... auto_insert_metric_name=False)
+            >>> os.path.basename(ckpt.format_checkpoint_name(dict(epoch=2, val_loss=0.123456)))
+            'epoch=2-validation_loss=0.12.ckpt'
+            >>> ckpt = ModelCheckpoint(dirpath=tmpdir, filename='{missing:d}')
+            >>> os.path.basename(ckpt.format_checkpoint_name({}))
+            'missing=0.ckpt'
+            >>> ckpt = ModelCheckpoint(filename='{step}')
+            >>> os.path.basename(ckpt.format_checkpoint_name(dict(step=0)))
+            'step=0.ckpt'
+        """
+        filename = filename or self.filename
+        filename = self._format_checkpoint_name(filename, metrics, auto_insert_metric_name=self.auto_insert_metric_name)
+        if ver is not None:
+            filename = self.CHECKPOINT_JOIN_CHAR.join((filename, f"v{ver}"))
+        ckpt_name = f"{filename}{self.FILE_EXTENSION}"
+        return os.path.join(self.dirpath, ckpt_name) if self.dirpath else ckpt_name
+    def __resolve_ckpt_dir(self, trainer: "pl.Trainer") -> None:
+        """Determines model checkpoint save directory at runtime. References attributes from the trainer's logger
+        to determine where to save checkpoints. The base path for saving weights is set in this priority:
+        1.  Checkpoint callback's path (if passed in)
+        2.  The default_root_dir from trainer if trainer has no logger
+        3.  The weights_save_path from trainer, if user provides it (deprecated)
+        4.  User provided weights_saved_path
+        The base path gets extended with logger name and version (if these are available)
+        and subfolder "checkpoints".
+        """
+        if self.dirpath is not None:
+            return  # short circuit
+        # TODO: Remove weights_save_path logic here in v1.8
+        if trainer.loggers:
+            if trainer._weights_save_path_internal != trainer.default_root_dir:
+                # the user has changed weights_save_path, it overrides anything
+                save_dir = trainer._weights_save_path_internal
+            elif len(trainer.loggers) == 1:
+                save_dir = trainer.logger.save_dir or trainer.default_root_dir
+            else:
+                save_dir = trainer.default_root_dir
+            name = _name(trainer.loggers)
+            version = _version(trainer.loggers)
+            version = version if isinstance(version, str) else f"version_{version}"
+            ckpt_path = os.path.join(save_dir, str(name), version, "checkpoints")
+        else:
+            ckpt_path = os.path.join(trainer._weights_save_path_internal, "checkpoints")
+        ckpt_path = trainer.strategy.broadcast(ckpt_path)
+        self.dirpath = ckpt_path
+    def __warn_if_dir_not_empty(self, dirpath: _PATH) -> None:
+        if self.save_top_k != 0 and self._fs.isdir(dirpath) and len(self._fs.ls(dirpath)) > 0:
+            rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
+    def _get_metric_interpolated_filepath_name(
+        self, monitor_candidates: Dict[str, _METRIC], trainer: "pl.Trainer", del_filepath: Optional[str] = None
+    ) -> str:
+        filepath = self.format_checkpoint_name(monitor_candidates)
+        version_cnt = self.STARTING_VERSION
+        while self.file_exists(filepath, trainer) and filepath != del_filepath:
+            filepath = self.format_checkpoint_name(monitor_candidates, ver=version_cnt)
+            version_cnt += 1
+        return filepath
+    def _monitor_candidates(self, trainer: "pl.Trainer") -> Dict[str, _METRIC]:
+        monitor_candidates = deepcopy(trainer.callback_metrics)
+        # cast to int if necessary because `self.log("epoch", 123)` will convert it to float. if it's not a tensor
+        # or does not exist we overwrite it as it's likely an error
+        epoch = monitor_candidates.get("epoch")
+        monitor_candidates["epoch"] = (
+            epoch.int() if isinstance(epoch, torch.Tensor) else torch.tensor(trainer.current_epoch)
+        )
+        step = monitor_candidates.get("step")
+        monitor_candidates["step"] = step.int() if isinstance(step, torch.Tensor) else torch.tensor(trainer.global_step)
+        return monitor_candidates
+    def _save_last_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[str, _METRIC]) -> None:
+        if not self.save_last:
+            return
+        filepath = self.format_checkpoint_name(monitor_candidates, self.CHECKPOINT_NAME_LAST)
+        # set the last model path before saving because it will be part of the state.
+        previous, self.last_model_path = self.last_model_path, filepath
+        self._save_checkpoint(trainer, filepath)
+        if previous and previous != filepath:
+            trainer.strategy.remove_checkpoint(previous)
+    def _save_monitor_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[str, _METRIC]) -> None:
+        current = monitor_candidates.get(self.monitor)
+        if self.check_monitor_top_k(trainer, current):
+            self._update_best_and_save(current, trainer, monitor_candidates)
+        elif self.verbose:
+            epoch = monitor_candidates["epoch"]
+            step = monitor_candidates["step"]
+            rank_zero_info(f"Epoch {epoch:d}, global step {step:d}: {self.monitor!r} was not in top {self.save_top_k}")
+    def _save_none_monitor_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[str, _METRIC]) -> None:
+        filepath = self._get_metric_interpolated_filepath_name(monitor_candidates, trainer)
+        # set the best model path before saving because it will be part of the state.
+        previous, self.best_model_path = self.best_model_path, filepath
+        self._save_checkpoint(trainer, filepath)
+        if self.save_top_k == 1 and previous and previous != filepath:
+            trainer.strategy.remove_checkpoint(previous)
+    def _update_best_and_save(
+        self, current: torch.Tensor, trainer: "pl.Trainer", monitor_candidates: Dict[str, _METRIC]
+    ) -> None:
+        k = len(self.best_k_models) + 1 if self.save_top_k == -1 else self.save_top_k
+        del_filepath = None
+        if len(self.best_k_models) == k and k > 0:
+            del_filepath = self.kth_best_model_path
+            self.best_k_models.pop(del_filepath)
+        # do not save nan, replace with +/- inf
+        if isinstance(current, torch.Tensor) and torch.isnan(current):
+            current = torch.tensor(float("inf" if self.mode == "min" else "-inf"), device=current.device)
+        filepath = self._get_metric_interpolated_filepath_name(monitor_candidates, trainer, del_filepath)
+        # save the current score
+        self.current_score = current
+        self.best_k_models[filepath] = current
+        if len(self.best_k_models) == k:
+            # monitor dict has reached k elements
+            _op = max if self.mode == "min" else min
+            self.kth_best_model_path = _op(self.best_k_models, key=self.best_k_models.get)
+            self.kth_value = self.best_k_models[self.kth_best_model_path]
+        _op = min if self.mode == "min" else max
+        self.best_model_path = _op(self.best_k_models, key=self.best_k_models.get)
+        self.best_model_score = self.best_k_models[self.best_model_path]
+        if self.verbose:
+            epoch = monitor_candidates["epoch"]
+            step = monitor_candidates["step"]
+            rank_zero_info(
+                f"Epoch {epoch:d}, global step {step:d}: {self.monitor!r} reached {current:0.5f}"
+                f" (best {self.best_model_score:0.5f}), saving model to {filepath!r} as top {k}"
+            )
+        self._save_checkpoint(trainer, filepath)
+        if del_filepath is not None and filepath != del_filepath:
+            trainer.strategy.remove_checkpoint(del_filepath)
+    def to_yaml(self, filepath: Optional[_PATH] = None) -> None:
+        """Saves the `best_k_models` dict containing the checkpoint paths with the corresponding scores to a YAML
+        file."""
+        best_k = {k: v.item() for k, v in self.best_k_models.items()}
+        if filepath is None:
+            filepath = os.path.join(self.dirpath, "best_k_models.yaml")
+        with self._fs.open(filepath, "w") as fp:
+            yaml.dump(best_k, fp)
+    def file_exists(self, filepath: _PATH, trainer: "pl.Trainer") -> bool:
+        """Checks if a file exists on rank 0 and broadcasts the result to all other ranks, preventing the internal
+        state to diverge between ranks."""
+        exists = self._fs.exists(filepath)
+        return trainer.strategy.broadcast(exists)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_summary.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Model Summary
+=============
+Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
+The string representation of this summary prints a table with columns containing
+the name, type and number of parameters for each layer.
+"""
+import logging
+from typing import List, Tuple
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.model_summary import _format_summary_table, summarize
+log = logging.getLogger(__name__)
+class ModelSummary(Callback):
+    r"""
+    Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
+    Args:
+        max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
+            layer summary off.
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import ModelSummary
+        >>> trainer = Trainer(callbacks=[ModelSummary(max_depth=1)])
+    """
+    def __init__(self, max_depth: int = 1) -> None:
+        self._max_depth: int = max_depth
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if not self._max_depth:
+            return None
+        model_summary = summarize(pl_module, max_depth=self._max_depth)
+        summary_data = model_summary._get_summary_data()
+        total_parameters = model_summary.total_parameters
+        trainable_parameters = model_summary.trainable_parameters
+        model_size = model_summary.model_size
+        if trainer.is_global_zero:
+            self.summarize(summary_data, total_parameters, trainable_parameters, model_size)
+    @staticmethod
+    def summarize(
+        summary_data: List[Tuple[str, List[str]]],
+        total_parameters: int,
+        trainable_parameters: int,
+        model_size: float,
+    ) -> None:
+        summary_table = _format_summary_table(total_parameters, trainable_parameters, model_size, *summary_data)
+        log.info("\n" + summary_table)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/prediction_writer.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+BasePredictionWriter
+====================
+Aids in saving predictions
+"""
+from typing import Any, Optional, Sequence
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities import LightningEnum
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+class WriteInterval(LightningEnum):
+    BATCH = "batch"
+    EPOCH = "epoch"
+    BATCH_AND_EPOCH = "batch_and_epoch"
+    @property
+    def on_batch(self) -> bool:
+        return self in (self.BATCH, self.BATCH_AND_EPOCH)
+    @property
+    def on_epoch(self) -> bool:
+        return self in (self.EPOCH, self.BATCH_AND_EPOCH)
+class BasePredictionWriter(Callback):
+    """Base class to implement how the predictions should be stored.
+    Args:
+        write_interval: When to write.
+    Example::
+        import torch
+        from pytorch_lightning.callbacks import BasePredictionWriter
+        class CustomWriter(BasePredictionWriter):
+            def __init__(self, output_dir: str, write_interval: str):
+                super().__init__(write_interval)
+                self.output_dir
+            def write_on_batch_end(
+                self, trainer, pl_module: 'LightningModule', prediction: Any, batch_indices: List[int], batch: Any,
+                batch_idx: int, dataloader_idx: int
+            ):
+                torch.save(prediction, os.path.join(self.output_dir, dataloader_idx, f"{batch_idx}.pt"))
+            def write_on_epoch_end(
+                self, trainer, pl_module: 'LightningModule', predictions: List[Any], batch_indices: List[Any]
+            ):
+                torch.save(predictions, os.path.join(self.output_dir, "predictions.pt"))
+    """
+    def __init__(self, write_interval: str = "batch") -> None:
+        if write_interval not in list(WriteInterval):
+            raise MisconfigurationException(f"`write_interval` should be one of {[i.value for i in WriteInterval]}.")
+        self.interval = WriteInterval(write_interval)
+    def write_on_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        prediction: Any,
+        batch_indices: Optional[Sequence[int]],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """Override with the logic to write a single batch."""
+        raise NotImplementedError()
+    def write_on_epoch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        predictions: Sequence[Any],
+        batch_indices: Optional[Sequence[Any]],
+    ) -> None:
+        """Override with the logic to write all batches."""
+        raise NotImplementedError()
+    def on_predict_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: Any,
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        if not self.interval.on_batch:
+            return
+        batch_indices = trainer.predict_loop.epoch_loop.current_batch_indices
+        self.write_on_batch_end(trainer, pl_module, outputs, batch_indices, batch, batch_idx, dataloader_idx)
+    def on_predict_epoch_end(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", outputs: Sequence[Any]
+    ) -> None:
+        if not self.interval.on_epoch:
+            return
+        epoch_batch_indices = trainer.predict_loop.epoch_batch_indices
+        self.write_on_epoch_end(trainer, pl_module, trainer.predict_loop.predictions, epoch_batch_indices)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/pruning.py ADDED Viewed

	@@ -0,0 +1,486 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+ModelPruning
+^^^^^^^^^^^^
+"""
+import inspect
+import logging
+from copy import deepcopy
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+import torch
+import torch.nn.utils.prune as pytorch_prune
+from torch import nn
+from typing_extensions import TypedDict
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.core.lightning import LightningModule
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_debug, rank_zero_only
+log = logging.getLogger(__name__)
+_PYTORCH_PRUNING_FUNCTIONS = {
+    "ln_structured": pytorch_prune.ln_structured,
+    "l1_unstructured": pytorch_prune.l1_unstructured,
+    "random_structured": pytorch_prune.random_structured,
+    "random_unstructured": pytorch_prune.random_unstructured,
+}
+_PYTORCH_PRUNING_METHOD = {
+    "ln_structured": pytorch_prune.LnStructured,
+    "l1_unstructured": pytorch_prune.L1Unstructured,
+    "random_structured": pytorch_prune.RandomStructured,
+    "random_unstructured": pytorch_prune.RandomUnstructured,
+}
+_PARAM_TUPLE = Tuple[nn.Module, str]
+_PARAM_LIST = Sequence[_PARAM_TUPLE]
+_MODULE_CONTAINERS = (LightningModule, nn.Sequential, nn.ModuleList, nn.ModuleDict)
+class _LayerRef(TypedDict):
+    data: nn.Module
+    names: List[Tuple[int, str]]
+class ModelPruning(Callback):
+    PARAMETER_NAMES = ("weight", "bias")
+    def __init__(
+        self,
+        pruning_fn: Union[Callable, str],
+        parameters_to_prune: _PARAM_LIST = (),
+        parameter_names: Optional[List[str]] = None,
+        use_global_unstructured: bool = True,
+        amount: Union[int, float, Callable[[int], Union[int, float]]] = 0.5,
+        apply_pruning: Union[bool, Callable[[int], bool]] = True,
+        make_pruning_permanent: bool = True,
+        use_lottery_ticket_hypothesis: Union[bool, Callable[[int], bool]] = True,
+        resample_parameters: bool = False,
+        pruning_dim: Optional[int] = None,
+        pruning_norm: Optional[int] = None,
+        verbose: int = 0,
+        prune_on_train_epoch_end: bool = True,
+    ) -> None:
+        """Model pruning Callback, using PyTorch's prune utilities. This callback is responsible of pruning
+        networks parameters during training.
+        To learn more about pruning with PyTorch, please take a look at
+        `this tutorial <https://pytorch.org/tutorials/intermediate/pruning_tutorial.html>`_.
+        .. warning:: ``ModelPruning`` is in beta and subject to change.
+        .. code-block:: python
+            parameters_to_prune = [(model.mlp_1, "weight"), (model.mlp_2, "weight")]
+            trainer = Trainer(
+                callbacks=[
+                    ModelPruning(
+                        pruning_fn="l1_unstructured",
+                        parameters_to_prune=parameters_to_prune,
+                        amount=0.01,
+                        use_global_unstructured=True,
+                    )
+                ]
+            )
+        When ``parameters_to_prune`` is ``None``, ``parameters_to_prune`` will contain all parameters from the model.
+        The user can override ``filter_parameters_to_prune`` to filter any ``nn.Module`` to be pruned.
+        Args:
+            pruning_fn: Function from torch.nn.utils.prune module or your own PyTorch ``BasePruningMethod`` subclass.
+                Can also be string e.g. `"l1_unstructured"`. See pytorch docs for more details.
+            parameters_to_prune: List of tuples ``(nn.Module, "parameter_name_string")``.
+            parameter_names: List of parameter names to be pruned from the nn.Module.
+                Can either be ``"weight"`` or ``"bias"``.
+            use_global_unstructured: Whether to apply pruning globally on the model.
+                If ``parameters_to_prune`` is provided, global unstructured will be restricted on them.
+            amount: Quantity of parameters to prune:
+                - ``float``. Between 0.0 and 1.0. Represents the fraction of parameters to prune.
+                - ``int``. Represents the absolute number of parameters to prune.
+                - ``Callable``. For dynamic values. Will be called every epoch. Should return a value.
+            apply_pruning: Whether to apply pruning.
+                - ``bool``. Always apply it or not.
+                - ``Callable[[epoch], bool]``. For dynamic values. Will be called every epoch.
+            make_pruning_permanent: Whether to remove all reparametrization pre-hooks and apply masks
+                when training ends or the model is saved.
+            use_lottery_ticket_hypothesis: See `The lottery ticket hypothesis <https://arxiv.org/abs/1803.03635>`_:
+                - ``bool``. Whether to apply it or not.
+                - ``Callable[[epoch], bool]``. For dynamic values. Will be called every epoch.
+            resample_parameters: Used with ``use_lottery_ticket_hypothesis``. If True, the model parameters will
+                be resampled, otherwise, the exact original parameters will be used.
+            pruning_dim: If you are using a structured pruning method you need to specify the dimension.
+            pruning_norm: If you are using ``ln_structured`` you need to specify the norm.
+            verbose: Verbosity level. 0 to disable, 1 to log overall sparsity, 2 to log per-layer sparsity
+            prune_on_train_epoch_end: whether to apply pruning at the end of the training epoch.
+                If this is ``False``, then the check runs at the end of the validation epoch.
+        Raises:
+            MisconfigurationException:
+                If ``parameter_names`` is neither ``"weight"`` nor ``"bias"``,
+                if the provided ``pruning_fn`` is not supported,
+                if ``pruning_dim`` is not provided when ``"unstructured"``,
+                if ``pruning_norm`` is not provided when ``"ln_structured"``,
+                if ``pruning_fn`` is neither ``str`` nor :class:`torch.nn.utils.prune.BasePruningMethod`, or
+                if ``amount`` is none of ``int``, ``float`` and ``Callable``.
+        """
+        self._use_global_unstructured = use_global_unstructured
+        self._parameters_to_prune = parameters_to_prune
+        self._use_lottery_ticket_hypothesis = use_lottery_ticket_hypothesis
+        self._resample_parameters = resample_parameters
+        self._prune_on_train_epoch_end = prune_on_train_epoch_end
+        self._parameter_names = parameter_names or self.PARAMETER_NAMES
+        self._global_kwargs: Dict[str, Any] = {}
+        self._original_layers: Optional[Dict[int, _LayerRef]] = None
+        self._pruning_method_name: Optional[str] = None
+        for name in self._parameter_names:
+            if name not in self.PARAMETER_NAMES:
+                raise MisconfigurationException(
+                    f"The provided `parameter_names` name: {name} isn't in {self.PARAMETER_NAMES}"
+                )
+        if isinstance(pruning_fn, str):
+            pruning_kwargs = {}
+            pruning_fn = pruning_fn.lower()
+            if pruning_fn not in _PYTORCH_PRUNING_FUNCTIONS:
+                raise MisconfigurationException(
+                    f"The provided `pruning_fn` {pruning_fn} isn't available in PyTorch's"
+                    f" built-in functions: {list(_PYTORCH_PRUNING_FUNCTIONS.keys())} "
+                )
+            if pruning_fn.endswith("_structured"):
+                if pruning_dim is None:
+                    raise MisconfigurationException(
+                        "When requesting `structured` pruning, the `pruning_dim` should be provided."
+                    )
+                if pruning_fn == "ln_structured":
+                    if pruning_norm is None:
+                        raise MisconfigurationException(
+                            "When requesting `ln_structured` pruning, the `pruning_norm` should be provided."
+                        )
+                    pruning_kwargs["n"] = pruning_norm
+                pruning_kwargs["dim"] = pruning_dim
+            pruning_fn = self._create_pruning_fn(pruning_fn, **pruning_kwargs)
+        elif self._is_pruning_method(pruning_fn):
+            if not use_global_unstructured:
+                raise MisconfigurationException(
+                    "PyTorch `BasePruningMethod` is currently only supported with `use_global_unstructured=True`."
+                )
+        else:
+            raise MisconfigurationException(
+                f"`pruning_fn` is expected to be a str in {list(_PYTORCH_PRUNING_FUNCTIONS.keys())}"
+                f" or a PyTorch `BasePruningMethod`. Found: {pruning_fn}."
+                " HINT: if passing a `BasePruningMethod`, pass the the class, not an instance"
+            )
+        # need to ignore typing here since pytorch base class does not define the PRUNING_TYPE attribute
+        if use_global_unstructured and pruning_fn.PRUNING_TYPE != "unstructured":  # type: ignore
+            raise MisconfigurationException(
+                'Only the "unstructured" PRUNING_TYPE is supported with `use_global_unstructured=True`.'  # type: ignore
+                f" Found method {pruning_fn} of type {pruning_fn.PRUNING_TYPE}. "
+            )
+        self.pruning_fn = pruning_fn
+        self._apply_pruning = apply_pruning
+        self._make_pruning_permanent = make_pruning_permanent
+        if not (isinstance(amount, (int, float)) or callable(amount)):
+            raise MisconfigurationException(
+                "`amount` should be provided and be either an int, a float or Callable function."
+            )
+        self.amount = amount
+        if verbose not in (0, 1, 2):
+            raise MisconfigurationException("`verbose` must be any of (0, 1, 2)")
+        self._verbose = verbose
+    def filter_parameters_to_prune(self, parameters_to_prune: _PARAM_LIST = ()) -> _PARAM_LIST:
+        """This function can be overridden to control which module to prune."""
+        return parameters_to_prune
+    def _create_pruning_fn(self, pruning_fn: str, **kwargs: Any) -> Union[Callable, pytorch_prune.BasePruningMethod]:
+        """This function takes `pruning_fn`, a function name.
+        IF use_global_unstructured, pruning_fn will be resolved into its associated ``PyTorch BasePruningMethod`` ELSE,
+        pruning_fn will be resolved into its function counterpart from `torch.nn.utils.prune`.
+        """
+        pruning_meth = (
+            _PYTORCH_PRUNING_METHOD[pruning_fn]
+            if self._use_global_unstructured
+            else _PYTORCH_PRUNING_FUNCTIONS[pruning_fn]
+        )
+        assert callable(pruning_meth), "Selected pruning method is not callable"
+        if self._use_global_unstructured:
+            self._global_kwargs = kwargs
+        # save the function __name__ now because partial does not include it
+        # and there are issues setting the attribute manually in ddp.
+        self._pruning_method_name = pruning_meth.__name__
+        if self._use_global_unstructured:
+            return pruning_meth
+        return ModelPruning._wrap_pruning_fn(pruning_meth, **kwargs)
+    @staticmethod
+    def _wrap_pruning_fn(pruning_fn: Callable, **kwargs: Any) -> Callable:
+        return partial(pruning_fn, **kwargs)
+    def make_pruning_permanent(self, module: nn.Module) -> None:
+        """Removes pruning buffers from any pruned modules.
+        Adapted from https://github.com/pytorch/pytorch/blob/1.7.1/torch/nn/utils/prune.py#L1176-L1180
+        """
+        for _, module in module.named_modules():
+            for k in list(module._forward_pre_hooks):
+                hook = module._forward_pre_hooks[k]
+                if isinstance(hook, pytorch_prune.BasePruningMethod):
+                    hook.remove(module)
+                    del module._forward_pre_hooks[k]
+    @staticmethod
+    def _copy_param(new: nn.Module, old: nn.Module, name: str) -> None:
+        dst = getattr(new, name)
+        src = getattr(old, name)
+        if dst is None or src is None or not isinstance(dst, torch.Tensor) or not isinstance(src, torch.Tensor):
+            return
+        dst.data = src.data.to(dst.device)
+    def apply_lottery_ticket_hypothesis(self) -> None:
+        r"""
+        Lottery ticket hypothesis algorithm (see page 2 of the paper):
+            1. Randomly initialize a neural network :math:`f(x; \theta_0)` (where :math:`\theta_0 \sim \mathcal{D}_\theta`).
+            2. Train the network for :math:`j` iterations, arriving at parameters :math:`\theta_j`.
+            3. Prune :math:`p\%` of the parameters in :math:`\theta_j`, creating a mask :math:`m`.
+            4. Reset the remaining parameters to their values in :math:`\theta_0`, creating the winning ticket :math:`f(x; m \odot \theta_0)`.
+        This function implements the step 4.
+        The ``resample_parameters`` argument can be used to reset the parameters with a new :math:`\theta_z \sim \mathcal{D}_\theta`
+        """  # noqa: E501
+        assert self._original_layers is not None
+        for d in self._original_layers.values():
+            copy = d["data"]
+            names = d["names"]
+            if self._resample_parameters and hasattr(copy, "reset_parameters") and callable(copy.reset_parameters):
+                copy = deepcopy(copy)  # keep the original parameters
+                copy.reset_parameters()
+            for i, name in names:
+                new, new_name = self._parameters_to_prune[i]
+                self._copy_param(new, copy, name)
+    def _apply_local_pruning(self, amount: float) -> None:
+        for module, name in self._parameters_to_prune:
+            self.pruning_fn(module, name=name, amount=amount)
+    def _resolve_global_kwargs(self, amount: float) -> Dict[str, Any]:
+        self._global_kwargs["amount"] = amount
+        params = set(inspect.signature(self.pruning_fn).parameters)
+        params.discard("self")
+        return {k: v for k, v in self._global_kwargs.items() if k in params}
+    def _apply_global_pruning(self, amount: float) -> None:
+        pytorch_prune.global_unstructured(
+            self._parameters_to_prune, pruning_method=self.pruning_fn, **self._resolve_global_kwargs(amount)
+        )
+    @staticmethod
+    def _get_pruned_stats(module: nn.Module, name: str) -> Tuple[int, int]:
+        attr = f"{name}_mask"
+        if not hasattr(module, attr):
+            return 0, 1
+        mask = getattr(module, attr)
+        return (mask == 0).sum().item(), mask.numel()
+    def apply_pruning(self, amount: Union[int, float]) -> None:
+        """Applies pruning to ``parameters_to_prune``."""
+        if self._verbose:
+            prev_stats = [self._get_pruned_stats(m, n) for m, n in self._parameters_to_prune]
+        if self._use_global_unstructured:
+            self._apply_global_pruning(amount)
+        else:
+            self._apply_local_pruning(amount)
+        if self._verbose:
+            curr_stats = [self._get_pruned_stats(m, n) for m, n in self._parameters_to_prune]
+            self._log_sparsity_stats(prev_stats, curr_stats, amount=amount)
+    @rank_zero_only
+    def _log_sparsity_stats(
+        self, prev: List[Tuple[int, int]], curr: List[Tuple[int, int]], amount: Union[int, float] = 0
+    ) -> None:
+        total_params = sum(p.numel() for layer, _ in self._parameters_to_prune for p in layer.parameters())
+        prev_total_zeros = sum(zeros for zeros, _ in prev)
+        curr_total_zeros = sum(zeros for zeros, _ in curr)
+        log.info(
+            f"Applied `{self._pruning_method_name}`. Pruned:"
+            f" {prev_total_zeros}/{total_params} ({prev_total_zeros / total_params:.2%}) ->"
+            f" {curr_total_zeros}/{total_params} ({curr_total_zeros / total_params:.2%})"
+        )
+        if self._verbose == 2:
+            for i, (module, name) in enumerate(self._parameters_to_prune):
+                prev_mask_zeros, prev_mask_size = prev[i]
+                curr_mask_zeros, curr_mask_size = curr[i]
+                log.info(
+                    f"Applied `{self._pruning_method_name}` to `{module!r}.{name}` with amount={amount}. Pruned:"
+                    f" {prev_mask_zeros} ({prev_mask_zeros / prev_mask_size:.2%}) ->"
+                    f" {curr_mask_zeros} ({curr_mask_zeros / curr_mask_size:.2%})"
+                )
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        parameters_to_prune = self.sanitize_parameters_to_prune(
+            pl_module, self._parameters_to_prune, parameter_names=self._parameter_names
+        )
+        self._parameters_to_prune = self.filter_parameters_to_prune(parameters_to_prune)
+        if self._use_lottery_ticket_hypothesis:
+            # group modules by id. Each entry has a copy of the initial data
+            # and a list of the associated parameter names to prune
+            self._original_layers = {}
+            for i, (module, name) in enumerate(self._parameters_to_prune):
+                id_ = id(module)
+                self._original_layers.setdefault(id_, _LayerRef(data=deepcopy(module), names=[]))
+                self._original_layers[id_]["names"].append((i, name))
+    def _run_pruning(self, current_epoch: int) -> None:
+        prune = self._apply_pruning(current_epoch) if callable(self._apply_pruning) else self._apply_pruning
+        amount = self.amount(current_epoch) if callable(self.amount) else self.amount
+        if not prune or not amount:
+            return
+        self.apply_pruning(amount)
+        if (
+            self._use_lottery_ticket_hypothesis(current_epoch)
+            if callable(self._use_lottery_ticket_hypothesis)
+            else self._use_lottery_ticket_hypothesis
+        ):
+            self.apply_lottery_ticket_hypothesis()
+    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: LightningModule) -> None:
+        if self._prune_on_train_epoch_end:
+            rank_zero_debug("`ModelPruning.on_train_epoch_end`. Applying pruning")
+            self._run_pruning(pl_module.current_epoch)
+    def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if not trainer.sanity_checking and not self._prune_on_train_epoch_end:
+            rank_zero_debug("`ModelPruning.on_validation_epoch_end`. Applying pruning")
+            self._run_pruning(pl_module.current_epoch)
+    def on_train_end(self, trainer: "pl.Trainer", pl_module: LightningModule) -> None:
+        if self._make_pruning_permanent:
+            rank_zero_debug("`ModelPruning.on_train_end`. Pruning is made permanent for this checkpoint")
+            self.make_pruning_permanent(pl_module)
+    def _make_pruning_permanent_on_state_dict(self, pl_module: LightningModule) -> Dict[str, Any]:
+        state_dict = pl_module.state_dict()
+        # find the mask and the original weights.
+        map_pruned_params = {k.replace("_mask", "") for k in state_dict.keys() if k.endswith("_mask")}
+        for tensor_name in map_pruned_params:
+            orig = state_dict.pop(tensor_name + "_orig")
+            mask = state_dict.pop(tensor_name + "_mask")
+            # make weights permanent
+            state_dict[tensor_name] = mask.to(dtype=orig.dtype) * orig
+        def move_to_cpu(tensor: torch.Tensor) -> torch.Tensor:
+            # each tensor and move them on cpu
+            return tensor.cpu()
+        return apply_to_collection(state_dict, torch.Tensor, move_to_cpu)
+    def on_save_checkpoint(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", checkpoint: Dict[str, Any]
+    ) -> Optional[dict]:
+        if self._make_pruning_permanent:
+            rank_zero_debug("`ModelPruning.on_save_checkpoint`. Pruning is made permanent for this checkpoint")
+            # manually prune the weights so training can keep going with the same buffers
+            checkpoint["state_dict"] = self._make_pruning_permanent_on_state_dict(pl_module)
+    @staticmethod
+    def sanitize_parameters_to_prune(
+        pl_module: LightningModule, parameters_to_prune: _PARAM_LIST = (), parameter_names: Sequence[str] = ()
+    ) -> _PARAM_LIST:
+        """This function is responsible of sanitizing ``parameters_to_prune`` and ``parameter_names``. If
+        ``parameters_to_prune is None``, it will be generated with all parameters of the model.
+        Raises:
+            MisconfigurationException:
+                If ``parameters_to_prune`` doesn't exist in the model, or
+                if ``parameters_to_prune`` is neither a list nor a tuple.
+        """
+        parameters = parameter_names or ModelPruning.PARAMETER_NAMES
+        current_modules = [m for m in pl_module.modules() if not isinstance(m, _MODULE_CONTAINERS)]
+        if not parameters_to_prune:
+            parameters_to_prune = [
+                (m, p) for p in parameters for m in current_modules if getattr(m, p, None) is not None
+            ]
+        elif (
+            isinstance(parameters_to_prune, (list, tuple))
+            and len(parameters_to_prune) > 0
+            and all(len(p) == 2 for p in parameters_to_prune)
+            and all(isinstance(a, nn.Module) and isinstance(b, str) for a, b in parameters_to_prune)
+        ):
+            missing_modules, missing_parameters = [], []
+            for module, name in parameters_to_prune:
+                if module not in current_modules:
+                    missing_modules.append(module)
+                    continue
+                if not hasattr(module, name):
+                    missing_parameters.append(name)
+            if missing_modules or missing_parameters:
+                raise MisconfigurationException(
+                    "Some provided `parameters_to_tune` don't exist in the model."
+                    f" Found missing modules: {missing_modules} and missing parameters: {missing_parameters}"
+                )
+        else:
+            raise MisconfigurationException(
+                "The provided `parameters_to_prune` should either be list of tuple"
+                " with 2 elements: (nn.Module, parameter_name_to_prune) or None"
+            )
+        return parameters_to_prune
+    @staticmethod
+    def _is_pruning_method(method: Any) -> bool:
+        if not inspect.isclass(method):
+            return False
+        return issubclass(method, pytorch_prune.BasePruningMethod)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/quantization.py ADDED Viewed

	@@ -0,0 +1,344 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Quantization
+^^^^^^^^^^^^
+"""
+import copy
+import functools
+from typing import Any, Callable, Dict, Optional, Sequence, Union
+import torch
+from torch import Tensor
+from torch.quantization import FakeQuantizeBase
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_10, _TORCH_GREATER_EQUAL_1_11
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+if _TORCH_GREATER_EQUAL_1_10:
+    from torch.ao.quantization.qconfig import QConfig
+else:
+    from torch.quantization import QConfig
+if _TORCH_GREATER_EQUAL_1_11:
+    from torch.ao.quantization import fuse_modules_qat as fuse_modules
+else:
+    from torch.quantization import fuse_modules
+def wrap_qat_forward_context(
+    quant_cb, model: "pl.LightningModule", func: Callable, trigger_condition: Optional[Union[Callable, int]] = None
+) -> Callable:
+    """Decorator to wrap forward path as it is needed to quantize inputs and dequantize outputs for in/out
+    compatibility Moreover this version has the (de)quantization conditional as it may not be needed for the
+    training all the time."""
+    # todo: consider using registering hook before/after forward
+    @functools.wraps(func)
+    def wrapper(data) -> Any:
+        _is_func_true = isinstance(trigger_condition, Callable) and trigger_condition(model.trainer)
+        _is_count_true = isinstance(trigger_condition, int) and quant_cb._forward_calls < trigger_condition
+        _quant_run = trigger_condition is None or _is_func_true or _is_count_true
+        # apply custom trigger
+        if _quant_run:
+            quant_cb._forward_calls += 1
+            data = model.quant(data)
+        data = func(data)
+        # apply custom trigger
+        if _quant_run:
+            data = model.dequant(data)
+        return data
+    return wrapper
+def wrap_quantize_forward_context(model: "pl.LightningModule", func: Callable) -> Callable:
+    """Decorator to wrap forward path as it is needed to quantize inputs and dequantize outputs for in/out
+    compatibility."""
+    # todo: consider using registering hook before/after forward
+    @functools.wraps(func)
+    def wrapper(data) -> Any:
+        data = model.quant(data)
+        data = func(data)
+        data = model.dequant(data)
+        return data
+    return wrapper
+def _recursive_hasattr(obj: Any, attribs: str, state: bool = True) -> bool:
+    """recursive check if model has some layers denoted with '.'."""
+    if "." in attribs:
+        attrib, attribs = attribs.split(".", 1)
+        if hasattr(obj, attrib):
+            return _recursive_hasattr(getattr(obj, attrib), attribs, state)
+        return False
+    return state and hasattr(obj, attribs)
+class QuantizationAwareTraining(Callback):
+    """Quantization allows speeding up inference and decreasing memory requirements by performing computations and
+    storing tensors at lower bitwidths (such as INT8 or FLOAT16) than floating point precision. We use native
+    PyTorch API so for more information see `PyTorch Quantization`_.
+    .. warning:: ``QuantizationAwareTraining`` is in beta and subject to change.
+    The ``LightningModule`` is prepared for QAT training in the ``on_fit_start`` hook. Checkpoints saved during training
+    include already collected stats to perform the Quantization conversion, but it doesn't contain the quantized or
+    fused model/layers. The quantization is performed in the ``on_fit_end`` hook so the model needs to be saved after
+    training finishes if quantization is desired.
+    Args:
+        qconfig: quantization configuration:
+            - 'fbgemm' for server inference.
+            - 'qnnpack' for mobile inference.
+            - a custom `torch.quantization.QConfig`_.
+        observer_type: allows switching between ``MovingAverageMinMaxObserver`` as "average" (default)
+            and ``HistogramObserver`` as "histogram" which is more computationally expensive.
+        collect_quantization: count or custom function to collect quantization statistics:
+            - ``None`` (default). The quantization observer is called in each module forward
+                (useful for collecting extended statistic when using image/data augmentation).
+            - ``int``. Use to set a fixed number of calls, starting from the beginning.
+            - ``Callable``. Custom function with single trainer argument.
+                See this example to trigger only the last epoch:
+                .. code-block:: python
+                    def custom_trigger_last(trainer):
+                        return trainer.current_epoch == (trainer.max_epochs - 1)
+                    QuantizationAwareTraining(collect_quantization=custom_trigger_last)
+        modules_to_fuse: allows you fuse a few layers together as shown in
+            `diagram <https://pytorch.org/docs/stable/quantization.html#quantization-aware-training>`_
+            to find which layer types can be fused, check https://github.com/pytorch/pytorch/pull/43286.
+        input_compatible: preserve quant/dequant layers. This allows to feat any input as to the original model,
+            but break compatibility to torchscript and export with ``torch.save``.
+        quantize_on_fit_end: perform the quantization in `on_fit_end`.
+            Note that once converted, the model cannot be put in training mode again.
+        observer_enabled_stages: allow fake-quantization modules' observers to do calibration during provided stages:
+            - ``'train'``: the observers can do calibration during training.
+            - ``'validate'``: the observers can do calibration during validating.
+              Note that we don't disable observers during the sanity check as the model hasn't been calibrated with
+              training data yet. After the sanity check, the fake-quantization modules are restored to initial states.
+            - ``'test'``: the observers can do calibration during testing.
+            - ``'predict'``: the observers can do calibration during predicting.
+            Note that we only handle observers belonging to fake-quantization modules. When ``qconfig`` is a ``str`` and
+            ``observer_type`` is ``'histogram'``, the observers won't belong to any fake-quantization modules and will
+            not be controlled by the callback.
+    .. _PyTorch Quantization: https://pytorch.org/docs/stable/quantization.html#quantization-aware-training
+    .. _torch.quantization.QConfig: https://pytorch.org/docs/stable/torch.quantization.html#torch.quantization.QConfig
+    """
+    OBSERVER_TYPES = ("histogram", "average")
+    OBSERVER_STAGES = ("train", "validate", "test", "predict")
+    def __init__(
+        self,
+        qconfig: Union[str, QConfig] = "fbgemm",
+        observer_type: str = "average",
+        collect_quantization: Optional[Union[int, Callable]] = None,
+        modules_to_fuse: Optional[Sequence] = None,
+        input_compatible: bool = True,
+        quantize_on_fit_end: bool = True,
+        observer_enabled_stages: Sequence[str] = ("train",),
+    ) -> None:
+        _valid_qconf_str = isinstance(qconfig, str) and qconfig in torch.backends.quantized.supported_engines
+        if not isinstance(qconfig, QConfig) and not _valid_qconf_str:
+            raise MisconfigurationException(
+                f"Unsupported qconfig: f{qconfig}.\nTry one of defaults: {torch.backends.quantized.supported_engines}"
+            )
+        self._qconfig = qconfig
+        if observer_type not in self.OBSERVER_TYPES:
+            raise MisconfigurationException(
+                f'Unsupported observer type "{observer_type}", allowed are {self.OBSERVER_TYPES}.'
+            )
+        self._observer_type = observer_type
+        if collect_quantization is not None and not isinstance(collect_quantization, (int, Callable)):
+            raise MisconfigurationException(
+                f'Unsupported `collect_quantization` "{collect_quantization}", allowed are `int` or `Callable`.'
+            )
+        self._collect_quantization = collect_quantization
+        self._modules_to_fuse = modules_to_fuse
+        self._input_compatible = input_compatible
+        self._convert_on_fit_end = quantize_on_fit_end
+        observer_enabled_stages = set(observer_enabled_stages)
+        unsupported_stages = observer_enabled_stages - set(self.OBSERVER_STAGES)
+        if unsupported_stages:
+            raise MisconfigurationException(
+                f'Unsupported stages "{tuple(sorted(unsupported_stages))}", allowed are {self.OBSERVER_STAGES}.'
+            )
+        self._observer_disabled_stages = set(self.OBSERVER_STAGES) - observer_enabled_stages
+        self._forward_calls = 0
+        self._fake_quant_to_initial_state_dict = {}
+        self._last_fake_quant_to_observer_enabled = {}
+        self._module_prepared = False
+    def _check_feasible_fuse(self, model: "pl.LightningModule") -> bool:
+        if not self._modules_to_fuse:
+            return False
+        for group in self._modules_to_fuse:
+            if not all(_recursive_hasattr(model, m) for m in group):
+                raise MisconfigurationException(
+                    f"You have requested to fuse {group} but one or more of them is not your model attributes"
+                )
+        return True
+    def _collect_observer_enabled(self) -> Dict[FakeQuantizeBase, Tensor]:
+        return {
+            fake_quant: fake_quant.observer_enabled.clone() for fake_quant in self._fake_quant_to_initial_state_dict
+        }
+    def _disable_observer(self, pl_module: "pl.LightningModule") -> None:
+        self._last_fake_quant_to_observer_enabled = self._collect_observer_enabled()
+        pl_module.apply(torch.quantization.disable_observer)
+    def _restore_last_observer_enabled(self) -> None:
+        for fake_quant, observer_enabled in self._last_fake_quant_to_observer_enabled.items():
+            fake_quant.observer_enabled.copy_(observer_enabled)
+    def _prepare_model(self, model: torch.nn.Module) -> None:
+        if self._module_prepared:
+            return
+        # QuantStub converts tensors from floating point to quantized
+        model.quant = torch.quantization.QuantStub()
+        # DeQuantStub converts tensors from quantized to floating point
+        model.dequant = torch.quantization.DeQuantStub()
+        # manually specify where tensors will be converted from quantized
+        # to floating point in the quantized model
+        self.__module_forward = model.forward
+        model.forward = wrap_qat_forward_context(
+            quant_cb=self, model=model, func=model.forward, trigger_condition=self._collect_quantization
+        )
+        # attach a global qconfig, which contains information about what kind
+        # of observers to attach. Use 'fbgemm' for server inference
+        if isinstance(self._qconfig, str):
+            if self._observer_type == "histogram":
+                model.qconfig = torch.quantization.get_default_qconfig(self._qconfig)
+            elif self._observer_type == "average":
+                # version=None corresponds to using FakeQuantize rather than
+                # FusedMovingAvgObsFakeQuantize which was introduced in PT1.10
+                # details in https://github.com/pytorch/pytorch/issues/64564
+                extra_kwargs = dict(version=None) if _TORCH_GREATER_EQUAL_1_10 else {}
+                model.qconfig = torch.quantization.get_default_qat_qconfig(self._qconfig, **extra_kwargs)
+        elif isinstance(self._qconfig, QConfig):
+            model.qconfig = self._qconfig
+        if self._check_feasible_fuse(model):
+            fuse_modules(model, self._modules_to_fuse, inplace=True)
+        # Prepare the model for QAT. This inserts observers and fake_quants in
+        # the model that will observe weight and activation tensors during calibration.
+        torch.quantization.prepare_qat(model, inplace=True)
+        fake_quants = tuple(module for module in model.modules() if isinstance(module, FakeQuantizeBase))
+        self._fake_quant_to_initial_state_dict = {
+            fake_quant: copy.deepcopy(fake_quant.state_dict()) for fake_quant in fake_quants
+        }
+        self._module_prepared = True
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
+        self._prepare_model(pl_module)
+    def on_fit_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if not self._convert_on_fit_end:
+            pl_module.forward = self.__module_forward
+            return
+        pl_module.eval()
+        # Convert the observed model to a quantized model. This does several things:
+        # quantizes the weights, computes and stores the scale and bias value to be
+        # used with each activation tensor, fuses modules where appropriate,
+        # and replaces key operators with quantized implementations.
+        torch.quantization.convert(pl_module, inplace=True)
+        # check we shall preserve wrapper
+        if self._input_compatible:
+            pl_module.forward = wrap_quantize_forward_context(model=pl_module, func=self.__module_forward)
+        else:
+            pl_module.forward = self.__module_forward
+    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "train" in self._observer_disabled_stages:
+            self._disable_observer(pl_module)
+    def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "train" in self._observer_disabled_stages:
+            self._restore_last_observer_enabled()
+    def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "validate" in self._observer_disabled_stages and not trainer.sanity_checking:
+            # ``torch.quantization.MovingAveragePerChannelMinMaxObserver`` and ``torch.quantization.HistogramObserver``
+            # need to see at least one batch to infer the shapes of quantization ``scale`` and ``zero_point``. So we
+            # don't disable observers during the sanity check so that they can infer the shapes of quantization
+            # parameters with validation data.
+            self._disable_observer(pl_module)
+    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "validate" in self._observer_disabled_stages:
+            if trainer.sanity_checking:
+                for fake_quant, state_dict in self._fake_quant_to_initial_state_dict.items():
+                    fake_quant.load_state_dict(state_dict)
+            else:
+                self._restore_last_observer_enabled()
+    def on_test_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "test" in self._observer_disabled_stages:
+            self._disable_observer(pl_module)
+    def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "test" in self._observer_disabled_stages:
+            self._restore_last_observer_enabled()
+    def on_predict_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "predict" in self._observer_disabled_stages:
+            self._disable_observer(pl_module)
+    def on_predict_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if "predict" in self._observer_disabled_stages:
+            self._restore_last_observer_enabled()
+    def state_dict(self) -> Dict[str, Any]:
+        keys = {"_qconfig", "_observer_type", "_collect_quantization", "_modules_to_fuse", "_input_compatible"}
+        return {n: getattr(self, n) for n in keys}
+    def _load_before_model(self, model: torch.nn.Module, state_dict: Dict[str, Any]) -> None:
+        """Special hook that gets called by the CheckpointConnector *before* the model gets loaded.
+        This hook replaces the :meth:`on_load_checkpoint` and :meth:`load_state_dict` callback methods which get called
+        after the model has already loaded the weights. For quantization, we need to convert the model first before that
+        happens, assuming the previous training used quantization.
+        """
+        for k, v in state_dict.items():
+            setattr(self, k, v)
+        self._prepare_model(model)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/rich_model_summary.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Tuple
+from pytorch_lightning.callbacks import ModelSummary
+from pytorch_lightning.utilities.imports import _RICH_AVAILABLE
+from pytorch_lightning.utilities.model_summary import get_human_readable_count
+if _RICH_AVAILABLE:
+    from rich import get_console
+    from rich.table import Table
+class RichModelSummary(ModelSummary):
+    r"""
+    Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`
+    with `rich text formatting <https://github.com/willmcgugan/rich>`_.
+    Install it with pip:
+    .. code-block:: bash
+        pip install rich
+    .. code-block:: python
+        from pytorch_lightning import Trainer
+        from pytorch_lightning.callbacks import RichModelSummary
+        trainer = Trainer(callbacks=RichModelSummary())
+    You could also enable ``RichModelSummary`` using the :class:`~pytorch_lightning.callbacks.RichProgressBar`
+    .. code-block:: python
+        from pytorch_lightning import Trainer
+        from pytorch_lightning.callbacks import RichProgressBar
+        trainer = Trainer(callbacks=RichProgressBar())
+    Args:
+        max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
+            layer summary off.
+    Raises:
+        ModuleNotFoundError:
+            If required `rich` package is not installed on the device.
+    """
+    def __init__(self, max_depth: int = 1) -> None:
+        if not _RICH_AVAILABLE:
+            raise ModuleNotFoundError(
+                "`RichModelSummary` requires `rich` to be installed. Install it by running `pip install -U rich`."
+            )
+        super().__init__(max_depth)
+    @staticmethod
+    def summarize(
+        summary_data: List[Tuple[str, List[str]]],
+        total_parameters: int,
+        trainable_parameters: int,
+        model_size: float,
+    ) -> None:
+        console = get_console()
+        table = Table(header_style="bold magenta")
+        table.add_column(" ", style="dim")
+        table.add_column("Name", justify="left", no_wrap=True)
+        table.add_column("Type")
+        table.add_column("Params", justify="right")
+        column_names = list(zip(*summary_data))[0]
+        for column_name in ["In sizes", "Out sizes"]:
+            if column_name in column_names:
+                table.add_column(column_name, justify="right", style="white")
+        rows = list(zip(*(arr[1] for arr in summary_data)))
+        for row in rows:
+            table.add_row(*row)
+        console.print(table)
+        parameters = []
+        for param in [trainable_parameters, total_parameters - trainable_parameters, total_parameters, model_size]:
+            parameters.append("{:<{}}".format(get_human_readable_count(int(param)), 10))
+        grid = Table.grid(expand=True)
+        grid.add_column()
+        grid.add_column()
+        grid.add_row(f"[bold]Trainable params[/]: {parameters[0]}")
+        grid.add_row(f"[bold]Non-trainable params[/]: {parameters[1]}")
+        grid.add_row(f"[bold]Total params[/]: {parameters[2]}")
+        grid.add_row(f"[bold]Total estimated model params size (MB)[/]: {parameters[3]}")
+        console.print(grid)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/stochastic_weight_avg.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Stochastic Weight Averaging Callback
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""
+from copy import deepcopy
+from typing import Callable, List, Optional, Union
+import torch
+from torch import nn
+from torch.optim.swa_utils import SWALR
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_info, rank_zero_warn
+from pytorch_lightning.utilities.types import LRSchedulerConfig
+_AVG_FN = Callable[[torch.Tensor, torch.Tensor, torch.LongTensor], torch.FloatTensor]
+class StochasticWeightAveraging(Callback):
+    def __init__(
+        self,
+        swa_epoch_start: Union[int, float] = 0.8,
+        swa_lrs: Optional[Union[float, List[float]]] = None,
+        annealing_epochs: int = 10,
+        annealing_strategy: str = "cos",
+        avg_fn: Optional[_AVG_FN] = None,
+        device: Optional[Union[torch.device, str]] = torch.device("cpu"),
+    ):
+        r"""
+        Implements the Stochastic Weight Averaging (SWA) Callback to average a model.
+        Stochastic Weight Averaging was proposed in ``Averaging Weights Leads to
+        Wider Optima and Better Generalization`` by Pavel Izmailov, Dmitrii
+        Podoprikhin, Timur Garipov, Dmitry Vetrov and Andrew Gordon Wilson
+        (UAI 2018).
+        This documentation is highly inspired by PyTorch's work on SWA.
+        The callback arguments follow the scheme defined in PyTorch's ``swa_utils`` package.
+        For a SWA explanation, please take a look
+        `here <https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-weight-averaging>`_.
+        .. warning:: ``StochasticWeightAveraging`` is in beta and subject to change.
+        .. warning:: ``StochasticWeightAveraging`` is currently not supported for multiple optimizers/schedulers.
+        .. warning:: ``StochasticWeightAveraging`` is currently only supported on every epoch.
+        See also how to :ref:`enable it directly on the Trainer <advanced/training_tricks:Stochastic Weight Averaging>`
+        Arguments:
+            swa_epoch_start: If provided as int, the procedure will start from
+                the ``swa_epoch_start``-th epoch. If provided as float between 0 and 1,
+                the procedure will start from ``int(swa_epoch_start * max_epochs)`` epoch
+            swa_lrs: The SWA learning rate to use:
+                - ``None``. Use the current learning rate of the optimizer at the time the SWA procedure starts.
+                - ``float``. Use this value for all parameter groups of the optimizer.
+                - ``List[float]``. A list values for each parameter group of the optimizer.
+            annealing_epochs: number of epochs in the annealing phase (default: 10)
+            annealing_strategy: Specifies the annealing strategy (default: "cos"):
+                - ``"cos"``. For cosine annealing.
+                - ``"linear"`` For linear annealing
+            avg_fn: the averaging function used to update the parameters;
+                the function must take in the current value of the
+                :class:`AveragedModel` parameter, the current value of :attr:`model`
+                parameter and the number of models already averaged; if None,
+                equally weighted average is used (default: ``None``)
+            device: if provided, the averaged model will be stored on the ``device``.
+                When None is provided, it will infer the `device` from ``pl_module``.
+                (default: ``"cpu"``)
+        """
+        err_msg = "swa_epoch_start should be a >0 integer or a float between 0 and 1."
+        if isinstance(swa_epoch_start, int) and swa_epoch_start < 1:
+            raise MisconfigurationException(err_msg)
+        if isinstance(swa_epoch_start, float) and not (0 <= swa_epoch_start <= 1):
+            raise MisconfigurationException(err_msg)
+        wrong_type = not isinstance(swa_lrs, (float, list))
+        wrong_float = isinstance(swa_lrs, float) and swa_lrs <= 0
+        wrong_list = isinstance(swa_lrs, list) and not all(lr > 0 and isinstance(lr, float) for lr in swa_lrs)
+        if swa_lrs is not None and (wrong_type or wrong_float or wrong_list):
+            raise MisconfigurationException(
+                "The `swa_lrs` should be `None`, a positive float, or a list of positive floats"
+            )
+        if avg_fn is not None and not isinstance(avg_fn, Callable):
+            raise MisconfigurationException("The `avg_fn` should be callable.")
+        if device is not None and not isinstance(device, (torch.device, str)):
+            raise MisconfigurationException(f"device is expected to be a torch.device or a str. Found {device}")
+        self._swa_epoch_start = swa_epoch_start
+        self._swa_lrs = swa_lrs
+        self._annealing_epochs = annealing_epochs
+        self._annealing_strategy = annealing_strategy
+        self._avg_fn = avg_fn or self.avg_fn
+        self._device = device
+        self._model_contains_batch_norm = None
+        self._average_model = None
+    @property
+    def swa_start(self) -> int:
+        return max(self._swa_epoch_start - 1, 0)  # 0-based
+    @property
+    def swa_end(self) -> int:
+        return self._max_epochs - 1  # 0-based
+    @staticmethod
+    def pl_module_contains_batch_norm(pl_module: "pl.LightningModule"):
+        return any(isinstance(module, nn.modules.batchnorm._BatchNorm) for module in pl_module.modules())
+    def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
+        # copy the model before moving it to accelerator device.
+        with pl_module._prevent_trainer_and_dataloaders_deepcopy():
+            self._average_model = deepcopy(pl_module)
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
+        if len(trainer.optimizers) != 1:
+            raise MisconfigurationException("SWA currently works with 1 `optimizer`.")
+        if len(trainer.lr_scheduler_configs) > 1:
+            raise MisconfigurationException("SWA currently not supported for more than 1 `lr_scheduler`.")
+        if isinstance(self._swa_epoch_start, float):
+            self._swa_epoch_start = int(trainer.max_epochs * self._swa_epoch_start)
+        self._model_contains_batch_norm = self.pl_module_contains_batch_norm(pl_module)
+        self._max_epochs = trainer.max_epochs
+        if self._model_contains_batch_norm:
+            # virtually increase max_epochs to perform batch norm update on latest epoch.
+            trainer.fit_loop.max_epochs += 1
+    def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
+        if trainer.current_epoch == self.swa_start:
+            # move average model to request device.
+            self._average_model = self._average_model.to(self._device or pl_module.device)
+            optimizer = trainer.optimizers[0]
+            if self._swa_lrs is None:
+                self._swa_lrs = [param_group["lr"] for param_group in optimizer.param_groups]
+            if isinstance(self._swa_lrs, float):
+                self._swa_lrs = [self._swa_lrs] * len(optimizer.param_groups)
+            for lr, group in zip(self._swa_lrs, optimizer.param_groups):
+                group["initial_lr"] = lr
+            self._swa_scheduler = SWALR(
+                optimizer,
+                swa_lr=self._swa_lrs,
+                anneal_epochs=self._annealing_epochs,
+                anneal_strategy=self._annealing_strategy,
+                last_epoch=trainer.max_epochs if self._annealing_strategy == "cos" else -1,
+            )
+            # We assert that there is only one optimizer on fit start, so know opt_idx is always 0
+            default_scheduler_cfg = LRSchedulerConfig(self._swa_scheduler, opt_idx=0)
+            assert default_scheduler_cfg.interval == "epoch" and default_scheduler_cfg.frequency == 1
+            if trainer.lr_scheduler_configs:
+                scheduler_cfg = trainer.lr_scheduler_configs[0]
+                if scheduler_cfg.interval != "epoch" or scheduler_cfg.frequency != 1:
+                    rank_zero_warn(f"SWA is currently only supported every epoch. Found {scheduler_cfg}")
+                rank_zero_info(
+                    f"Swapping scheduler `{scheduler_cfg.scheduler.__class__.__name__}`"
+                    f" for `{self._swa_scheduler.__class__.__name__}`"
+                )
+                trainer.lr_scheduler_configs[0] = default_scheduler_cfg
+            else:
+                trainer.lr_scheduler_configs.append(default_scheduler_cfg)
+            self.n_averaged = torch.tensor(0, dtype=torch.long, device=pl_module.device)
+        if self.swa_start <= trainer.current_epoch <= self.swa_end:
+            self.update_parameters(self._average_model, pl_module, self.n_averaged, self._avg_fn)
+        # Note: No > here in case the callback is saved with the model and training continues
+        if trainer.current_epoch == self.swa_end + 1:
+            # Transfer weights from average model to pl_module
+            self.transfer_weights(self._average_model, pl_module)
+            # Reset BatchNorm for update
+            self.reset_batch_norm_and_save_state(pl_module)
+            # There is no need to perform either backward or optimizer.step as we are
+            # performing only one pass over the train data-loader to compute activation statistics
+            # Therefore, we will virtually increase `num_training_batches` by 1 and skip backward.
+            trainer.num_training_batches += 1
+            trainer.fit_loop._skip_backward = True
+            self._accumulate_grad_batches = trainer.accumulate_grad_batches
+            trainer.accumulate_grad_batches = trainer.num_training_batches
+    def on_train_epoch_end(self, trainer: "pl.Trainer", *args):
+        trainer.fit_loop._skip_backward = False
+    def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
+        # the trainer increases the current epoch before this hook is called
+        if self._model_contains_batch_norm and trainer.current_epoch - 1 == self.swa_end + 1:
+            # BatchNorm epoch update. Reset state
+            trainer.accumulate_grad_batches = self._accumulate_grad_batches
+            trainer.num_training_batches -= 1
+            trainer.fit_loop.max_epochs -= 1
+            self.reset_momenta()
+        elif trainer.current_epoch - 1 == self.swa_end:
+            # Last SWA epoch. Transfer weights from average model to pl_module
+            self.transfer_weights(self._average_model, pl_module)
+    @staticmethod
+    def transfer_weights(src_pl_module: "pl.LightningModule", dst_pl_module: "pl.LightningModule"):
+        for src_param, dst_param in zip(src_pl_module.parameters(), dst_pl_module.parameters()):
+            dst_param.detach().copy_(src_param.to(dst_param.device))
+    def reset_batch_norm_and_save_state(self, pl_module: "pl.LightningModule"):
+        """Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L140-L154."""
+        self.momenta = {}
+        for module in pl_module.modules():
+            if not isinstance(module, nn.modules.batchnorm._BatchNorm):
+                continue
+            module.running_mean = torch.zeros_like(
+                module.running_mean, device=pl_module.device, dtype=module.running_mean.dtype
+            )
+            module.running_var = torch.ones_like(
+                module.running_var, device=pl_module.device, dtype=module.running_var.dtype
+            )
+            self.momenta[module] = module.momentum
+            module.momentum = None
+            module.num_batches_tracked *= 0
+    def reset_momenta(self):
+        """Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L164-L165."""
+        for bn_module in self.momenta:
+            bn_module.momentum = self.momenta[bn_module]
+    @staticmethod
+    def update_parameters(
+        average_model: "pl.LightningModule", model: "pl.LightningModule", n_averaged: torch.LongTensor, avg_fn: _AVG_FN
+    ):
+        """Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L104-L112."""
+        for p_swa, p_model in zip(average_model.parameters(), model.parameters()):
+            device = p_swa.device
+            p_swa_ = p_swa.detach()
+            p_model_ = p_model.detach().to(device)
+            src = p_model_ if n_averaged == 0 else avg_fn(p_swa_, p_model_, n_averaged.to(device))
+            p_swa_.copy_(src)
+        n_averaged += 1
+    @staticmethod
+    def avg_fn(
+        averaged_model_parameter: torch.Tensor, model_parameter: torch.Tensor, num_averaged: torch.LongTensor
+    ) -> torch.FloatTensor:
+        """Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L95-L97."""
+        return averaged_model_parameter + (model_parameter - averaged_model_parameter) / (num_averaged + 1)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/timer.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Timer
+^^^^^
+"""
+import logging
+import time
+from datetime import timedelta
+from typing import Any, Dict, Optional, Union
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.trainer.states import RunningStage
+from pytorch_lightning.utilities import LightningEnum
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_info
+log = logging.getLogger(__name__)
+class Interval(LightningEnum):
+    step = "step"
+    epoch = "epoch"
+class Timer(Callback):
+    """The Timer callback tracks the time spent in the training, validation, and test loops and interrupts the
+    Trainer if the given time limit for the training loop is reached.
+    Args:
+        duration: A string in the format DD:HH:MM:SS (days, hours, minutes seconds), or a :class:`datetime.timedelta`,
+            or a dict containing key-value compatible with :class:`~datetime.timedelta`.
+        interval: Determines if the interruption happens on epoch level or mid-epoch.
+            Can be either ``"epoch"`` or ``"step"``.
+        verbose: Set this to ``False`` to suppress logging messages.
+    Raises:
+        MisconfigurationException:
+            If ``interval`` is not one of the supported choices.
+    Example::
+        from pytorch_lightning import Trainer
+        from pytorch_lightning.callbacks import Timer
+        # stop training after 12 hours
+        timer = Timer(duration="00:12:00:00")
+        # or provide a datetime.timedelta
+        from datetime import timedelta
+        timer = Timer(duration=timedelta(weeks=1))
+        # or provide a dictionary
+        timer = Timer(duration=dict(weeks=4, days=2))
+        # force training to stop after given time limit
+        trainer = Trainer(callbacks=[timer])
+        # query training/validation/test time (in seconds)
+        timer.time_elapsed("train")
+        timer.start_time("validate")
+        timer.end_time("test")
+    """
+    def __init__(
+        self,
+        duration: Optional[Union[str, timedelta, Dict[str, int]]] = None,
+        interval: str = Interval.step,
+        verbose: bool = True,
+    ) -> None:
+        super().__init__()
+        if isinstance(duration, str):
+            dhms = duration.strip().split(":")
+            dhms = [int(i) for i in dhms]
+            duration = timedelta(days=dhms[0], hours=dhms[1], minutes=dhms[2], seconds=dhms[3])
+        if isinstance(duration, dict):
+            duration = timedelta(**duration)
+        if interval not in set(Interval):
+            raise MisconfigurationException(
+                f"Unsupported parameter value `Timer(interval={interval})`. Possible choices are:"
+                f" {', '.join(set(Interval))}"
+            )
+        self._duration = duration.total_seconds() if duration is not None else None
+        self._interval = interval
+        self._verbose = verbose
+        self._start_time: Dict[RunningStage, Optional[float]] = {stage: None for stage in RunningStage}
+        self._end_time: Dict[RunningStage, Optional[float]] = {stage: None for stage in RunningStage}
+        self._offset = 0
+    def start_time(self, stage: str = RunningStage.TRAINING) -> Optional[float]:
+        """Return the start time of a particular stage (in seconds)"""
+        stage = RunningStage(stage)
+        return self._start_time[stage]
+    def end_time(self, stage: str = RunningStage.TRAINING) -> Optional[float]:
+        """Return the end time of a particular stage (in seconds)"""
+        stage = RunningStage(stage)
+        return self._end_time[stage]
+    def time_elapsed(self, stage: str = RunningStage.TRAINING) -> float:
+        """Return the time elapsed for a particular stage (in seconds)"""
+        start = self.start_time(stage)
+        end = self.end_time(stage)
+        offset = self._offset if stage == RunningStage.TRAINING else 0
+        if start is None:
+            return offset
+        if end is None:
+            return time.monotonic() - start + offset
+        return end - start + offset
+    def time_remaining(self, stage: str = RunningStage.TRAINING) -> Optional[float]:
+        """Return the time remaining for a particular stage (in seconds)"""
+        if self._duration is not None:
+            return self._duration - self.time_elapsed(stage)
+    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._start_time[RunningStage.TRAINING] = time.monotonic()
+    def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._end_time[RunningStage.TRAINING] = time.monotonic()
+    def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._start_time[RunningStage.VALIDATING] = time.monotonic()
+    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._end_time[RunningStage.VALIDATING] = time.monotonic()
+    def on_test_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._start_time[RunningStage.TESTING] = time.monotonic()
+    def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._end_time[RunningStage.TESTING] = time.monotonic()
+    def on_fit_start(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        # this checks the time after the state is reloaded, regardless of the interval.
+        # this is necessary in case we load a state whose timer is already depleted
+        if self._duration is None:
+            return
+        self._check_time_remaining(trainer)
+    def on_train_batch_end(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        if self._interval != Interval.step or self._duration is None:
+            return
+        self._check_time_remaining(trainer)
+    def on_train_epoch_end(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        if self._interval != Interval.epoch or self._duration is None:
+            return
+        self._check_time_remaining(trainer)
+    def state_dict(self) -> Dict[str, Any]:
+        return {"time_elapsed": {stage.value: self.time_elapsed(stage) for stage in list(RunningStage)}}
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        time_elapsed = state_dict.get("time_elapsed", {})
+        self._offset = time_elapsed.get(RunningStage.TRAINING.value, 0)
+    def _check_time_remaining(self, trainer: "pl.Trainer") -> None:
+        assert self._duration is not None
+        should_stop = self.time_elapsed() >= self._duration
+        should_stop = trainer.strategy.broadcast(should_stop)
+        trainer.should_stop = trainer.should_stop or should_stop
+        if should_stop and self._verbose:
+            elapsed = timedelta(seconds=int(self.time_elapsed(RunningStage.TRAINING)))
+            rank_zero_info(f"Time limit reached. Elapsed time is {elapsed}. Signaling Trainer to stop.")

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/callbacks/xla_stats_monitor.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+XLA Stats Monitor
+=================
+Monitor and logs XLA stats during training.
+"""
+import time
+import pytorch_lightning as pl
+from pytorch_lightning.accelerators import TPUAccelerator
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities import _TPU_AVAILABLE
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info
+if _TPU_AVAILABLE:
+    import torch_xla.core.xla_model as xm
+class XLAStatsMonitor(Callback):
+    r"""
+    .. deprecated:: v1.5
+        The `XLAStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7.
+        Please use the `DeviceStatsMonitor` callback instead.
+    Automatically monitors and logs XLA stats during training stage. ``XLAStatsMonitor`` is a callback and in
+    order to use it you need to assign a logger in the ``Trainer``.
+    Args:
+        verbose: Set to ``True`` to print average peak and free memory, and epoch time
+            every epoch.
+    Raises:
+        MisconfigurationException:
+            If not running on TPUs, or ``Trainer`` has no logger.
+    Example::
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import XLAStatsMonitor
+        >>> xla_stats = XLAStatsMonitor() # doctest: +SKIP
+        >>> trainer = Trainer(callbacks=[xla_stats]) # doctest: +SKIP
+    """
+    def __init__(self, verbose: bool = True) -> None:
+        super().__init__()
+        rank_zero_deprecation(
+            "The `XLAStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7."
+            " Please use the `DeviceStatsMonitor` callback instead."
+        )
+        if not _TPU_AVAILABLE:
+            raise MisconfigurationException("Cannot use XLAStatsMonitor with TPUs are not available")
+        self._verbose = verbose
+    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if not trainer.loggers:
+            raise MisconfigurationException("Cannot use XLAStatsMonitor callback with Trainer that has no logger.")
+        if not isinstance(trainer.accelerator, TPUAccelerator):
+            raise MisconfigurationException(
+                "You are using XLAStatsMonitor but are not running on TPU."
+                f" The accelerator is set to {trainer.accelerator.__class__.__name__}."
+            )
+        device = trainer.strategy.root_device
+        memory_info = xm.get_memory_info(device)
+        total_memory = trainer.strategy.reduce(memory_info["kb_total"]) * 0.001
+        rank_zero_info(f"Average Total memory: {total_memory:.2f} MB")
+    def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._start_time = time.time()
+    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if not trainer.loggers:
+            raise MisconfigurationException("Cannot use XLAStatsMonitor callback with Trainer that has no logger.")
+        device = trainer.strategy.root_device
+        memory_info = xm.get_memory_info(device)
+        epoch_time = time.time() - self._start_time
+        free_memory = memory_info["kb_free"]
+        peak_memory = memory_info["kb_total"] - free_memory
+        free_memory = trainer.strategy.reduce(free_memory) * 0.001
+        peak_memory = trainer.strategy.reduce(peak_memory) * 0.001
+        epoch_time = trainer.strategy.reduce(epoch_time)
+        for logger in trainer.loggers:
+            logger.log_metrics(
+                {"avg. free memory (MB)": float(free_memory), "avg. peak memory (MB)": float(peak_memory)},
+                step=trainer.current_epoch,
+            )
+        if self._verbose:
+            rank_zero_info(f"Average Epoch time: {epoch_time:.2f} seconds")
+            rank_zero_info(f"Average Peak memory: {peak_memory:.2f} MB")
+            rank_zero_info(f"Average Free memory: {free_memory:.2f} MB")

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/datamodule.py ADDED Viewed

	@@ -0,0 +1,264 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LightningDataModule for loading DataLoaders with ease."""
+from argparse import ArgumentParser, Namespace
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Union
+from torch.utils.data import DataLoader, Dataset, IterableDataset
+from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks
+from pytorch_lightning.core.mixins import HyperparametersMixin
+from pytorch_lightning.utilities import rank_zero_deprecation
+from pytorch_lightning.utilities.argparse import add_argparse_args, from_argparse_args, get_init_arguments_and_types
+class LightningDataModule(CheckpointHooks, DataHooks, HyperparametersMixin):
+    """A DataModule standardizes the training, val, test splits, data preparation and transforms. The main
+    advantage is consistent data splits, data preparation and transforms across models.
+    Example::
+        class MyDataModule(LightningDataModule):
+            def __init__(self):
+                super().__init__()
+            def prepare_data(self):
+                # download, split, etc...
+                # only called on 1 GPU/TPU in distributed
+            def setup(self, stage):
+                # make assignments here (val/train/test split)
+                # called on every process in DDP
+            def train_dataloader(self):
+                train_split = Dataset(...)
+                return DataLoader(train_split)
+            def val_dataloader(self):
+                val_split = Dataset(...)
+                return DataLoader(val_split)
+            def test_dataloader(self):
+                test_split = Dataset(...)
+                return DataLoader(test_split)
+            def teardown(self):
+                # clean up after fit or test
+                # called on every process in DDP
+    """
+    name: str = ...
+    def __init__(self, train_transforms=None, val_transforms=None, test_transforms=None, dims=None):
+        super().__init__()
+        if train_transforms is not None:
+            rank_zero_deprecation(
+                "DataModule property `train_transforms` was deprecated in v1.5 and will be removed in v1.7."
+            )
+        if val_transforms is not None:
+            rank_zero_deprecation(
+                "DataModule property `val_transforms` was deprecated in v1.5 and will be removed in v1.7."
+            )
+        if test_transforms is not None:
+            rank_zero_deprecation(
+                "DataModule property `test_transforms` was deprecated in v1.5 and will be removed in v1.7."
+            )
+        if dims is not None:
+            rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
+        self._train_transforms = train_transforms
+        self._val_transforms = val_transforms
+        self._test_transforms = test_transforms
+        self._dims = dims if dims is not None else ()
+        # Pointer to the trainer object
+        self.trainer = None
+    @property
+    def train_transforms(self):
+        """Optional transforms (or collection of transforms) you can apply to train dataset.
+        .. deprecated:: v1.5     Will be removed in v1.7.0.
+        """
+        rank_zero_deprecation(
+            "DataModule property `train_transforms` was deprecated in v1.5 and will be removed in v1.7."
+        )
+        return self._train_transforms
+    @train_transforms.setter
+    def train_transforms(self, t):
+        rank_zero_deprecation(
+            "DataModule property `train_transforms` was deprecated in v1.5 and will be removed in v1.7."
+        )
+        self._train_transforms = t
+    @property
+    def val_transforms(self):
+        """Optional transforms (or collection of transforms) you can apply to validation dataset.
+        .. deprecated:: v1.5     Will be removed in v1.7.0.
+        """
+        rank_zero_deprecation(
+            "DataModule property `val_transforms` was deprecated in v1.5 and will be removed in v1.7."
+        )
+        return self._val_transforms
+    @val_transforms.setter
+    def val_transforms(self, t):
+        rank_zero_deprecation(
+            "DataModule property `val_transforms` was deprecated in v1.5 and will be removed in v1.7."
+        )
+        self._val_transforms = t
+    @property
+    def test_transforms(self):
+        """Optional transforms (or collection of transforms) you can apply to test dataset.
+        .. deprecated:: v1.5     Will be removed in v1.7.0.
+        """
+        rank_zero_deprecation(
+            "DataModule property `test_transforms` was deprecated in v1.5 and will be removed in v1.7."
+        )
+        return self._test_transforms
+    @test_transforms.setter
+    def test_transforms(self, t):
+        rank_zero_deprecation(
+            "DataModule property `test_transforms` was deprecated in v1.5 and will be removed in v1.7."
+        )
+        self._test_transforms = t
+    @property
+    def dims(self):
+        """A tuple describing the shape of your data. Extra functionality exposed in ``size``.
+        .. deprecated:: v1.5     Will be removed in v1.7.0.
+        """
+        rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
+        return self._dims
+    @dims.setter
+    def dims(self, d):
+        rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
+        self._dims = d
+    def size(self, dim=None) -> Union[Tuple, List[Tuple]]:
+        """Return the dimension of each input either as a tuple or list of tuples. You can index this just as you
+        would with a torch tensor.
+        .. deprecated:: v1.5     Will be removed in v1.7.0.
+        """
+        rank_zero_deprecation("DataModule property `size` was deprecated in v1.5 and will be removed in v1.7.")
+        if dim is not None:
+            return self.dims[dim]
+        return self.dims
+    @classmethod
+    def add_argparse_args(cls, parent_parser: ArgumentParser, **kwargs) -> ArgumentParser:
+        """Extends existing argparse by default `LightningDataModule` attributes."""
+        return add_argparse_args(cls, parent_parser, **kwargs)
+    @classmethod
+    def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs):
+        """Create an instance from CLI arguments.
+        Args:
+            args: The parser or namespace to take arguments from. Only known arguments will be
+                parsed and passed to the :class:`~pytorch_lightning.core.datamodule.LightningDataModule`.
+            **kwargs: Additional keyword arguments that may override ones in the parser or namespace.
+                These must be valid DataModule arguments.
+        Example::
+            parser = ArgumentParser(add_help=False)
+            parser = LightningDataModule.add_argparse_args(parser)
+            module = LightningDataModule.from_argparse_args(args)
+        """
+        return from_argparse_args(cls, args, **kwargs)
+    @classmethod
+    def get_init_arguments_and_types(cls) -> List[Tuple[str, Tuple, Any]]:
+        r"""Scans the DataModule signature and returns argument names, types and default values.
+        Returns:
+            List with tuples of 3 values:
+            (argument name, set with argument types, argument default value).
+        """
+        return get_init_arguments_and_types(cls)
+    @classmethod
+    def from_datasets(
+        cls,
+        train_dataset: Optional[Union[Dataset, Sequence[Dataset], Mapping[str, Dataset]]] = None,
+        val_dataset: Optional[Union[Dataset, Sequence[Dataset]]] = None,
+        test_dataset: Optional[Union[Dataset, Sequence[Dataset]]] = None,
+        batch_size: int = 1,
+        num_workers: int = 0,
+    ):
+        r"""
+        Create an instance from torch.utils.data.Dataset.
+        Args:
+            train_dataset: (optional) Dataset to be used for train_dataloader()
+            val_dataset: (optional) Dataset or list of Dataset to be used for val_dataloader()
+            test_dataset: (optional) Dataset or list of Dataset to be used for test_dataloader()
+            batch_size: Batch size to use for each dataloader. Default is 1.
+            num_workers: Number of subprocesses to use for data loading. 0 means that the
+                data will be loaded in the main process. Number of CPUs available.
+        """
+        def dataloader(ds: Dataset, shuffle: bool = False) -> DataLoader:
+            shuffle &= not isinstance(ds, IterableDataset)
+            return DataLoader(ds, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
+        def train_dataloader():
+            if isinstance(train_dataset, Mapping):
+                return {key: dataloader(ds, shuffle=True) for key, ds in train_dataset.items()}
+            if isinstance(train_dataset, Sequence):
+                return [dataloader(ds, shuffle=True) for ds in train_dataset]
+            return dataloader(train_dataset, shuffle=True)
+        def val_dataloader():
+            if isinstance(val_dataset, Sequence):
+                return [dataloader(ds) for ds in val_dataset]
+            return dataloader(val_dataset)
+        def test_dataloader():
+            if isinstance(test_dataset, Sequence):
+                return [dataloader(ds) for ds in test_dataset]
+            return dataloader(test_dataset)
+        datamodule = cls()
+        if train_dataset is not None:
+            datamodule.train_dataloader = train_dataloader
+        if val_dataset is not None:
+            datamodule.val_dataloader = val_dataloader
+        if test_dataset is not None:
+            datamodule.test_dataloader = test_dataloader
+        return datamodule
+    def state_dict(self) -> Dict[str, Any]:
+        """Called when saving a checkpoint, implement to generate and save datamodule state.
+        Returns:
+            A dictionary containing datamodule state.
+        """
+        return {}
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        """Called when loading a checkpoint, implement to reload datamodule state given datamodule state_dict.
+        Args:
+            state_dict: the datamodule state returned by ``state_dict``.
+        """
+        pass

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/decorators.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_warn
+rank_zero_deprecation(
+    "Using `pytorch_lightning.core.decorators.parameter_validation` is deprecated in v1.5, "
+    "and will be removed in v1.7. It has been replaced by automatic parameters tying with "
+    "`pytorch_lightning.utilities.params_tying.set_shared_parameters`"
+)
+from functools import wraps  # noqa: E402
+from typing import Callable  # noqa: E402
+def parameter_validation(fn: Callable) -> Callable:
+    """Validates that the module parameter lengths match after moving to the device. It is useful when tying
+    weights on TPU's.
+    Args:
+        fn: ``model_to_device`` method
+    Note:
+        TPU's require weights to be tied/shared after moving the module to the device.
+        Failure to do this results in the initialization of new weights which are not tied.
+        To overcome this issue, weights should be tied using the ``on_post_move_to_device`` model hook
+        which is called after the module has been moved to the device.
+    See Also:
+        - `XLA Documentation <https://github.com/pytorch/xla/blob/master/TROUBLESHOOTING.md#xla-tensor-quirks>`_
+    """
+    @wraps(fn)
+    def inner_fn(self, *args, **kwargs):
+        pre_layer_count = len(list(self.model.parameters()))
+        module = fn(self, *args, **kwargs)
+        self.model.on_post_move_to_device()
+        post_layer_count = len(list(self.model.parameters()))
+        if not pre_layer_count == post_layer_count:
+            rank_zero_warn(
+                "The model layers do not match after moving to the target device."
+                " If your model employs weight sharing on TPU,"
+                " please tie your weights using the `on_post_move_to_device` model hook.\n"
+                f"Layer count: [Before: {pre_layer_count} After: {post_layer_count}]"
+            )
+        return module
+    return inner_fn

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/hooks.py ADDED Viewed

	@@ -0,0 +1,828 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Various hooks to be used in the Lightning code."""
+from typing import Any, Dict, List, Optional
+import torch
+from torch.optim.optimizer import Optimizer
+from pytorch_lightning.utilities import move_data_to_device
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.types import EVAL_DATALOADERS, STEP_OUTPUT, TRAIN_DATALOADERS
+class ModelHooks:
+    """Hooks to be used in LightningModule."""
+    def on_fit_start(self) -> None:
+        """Called at the very beginning of fit.
+        If on DDP it is called on every process
+        """
+    def on_fit_end(self) -> None:
+        """Called at the very end of fit.
+        If on DDP it is called on every process
+        """
+    def on_train_start(self) -> None:
+        """Called at the beginning of training after sanity check."""
+    def on_train_end(self) -> None:
+        """Called at the end of training before logger experiment is closed."""
+    def on_validation_start(self) -> None:
+        """Called at the beginning of validation."""
+    def on_validation_end(self) -> None:
+        """Called at the end of validation."""
+    def on_test_start(self) -> None:
+        """Called at the beginning of testing."""
+    def on_test_end(self) -> None:
+        """Called at the end of testing."""
+    def on_predict_start(self) -> None:
+        """Called at the beginning of predicting."""
+    def on_predict_end(self) -> None:
+        """Called at the end of predicting."""
+    def on_pretrain_routine_start(self) -> None:
+        """Called at the beginning of the pretrain routine (between fit and train start).
+        - fit
+        - pretrain_routine start
+        - pretrain_routine end
+        - training_start
+        .. deprecated:: v1.6
+            :meth:`on_pretrain_routine_start` has been deprecated in v1.6 and will be removed in v1.8.
+            Use ``on_fit_start`` instead.
+        """
+    def on_pretrain_routine_end(self) -> None:
+        """Called at the end of the pretrain routine (between fit and train start).
+        - fit
+        - pretrain_routine start
+        - pretrain_routine end
+        - training_start
+        .. deprecated:: v1.6
+            :meth:`on_pretrain_routine_end` has been deprecated in v1.6 and will be removed in v1.8.
+            Use ``on_fit_start`` instead.
+        """
+    def on_train_batch_start(self, batch: Any, batch_idx: int, unused: int = 0) -> Optional[int]:
+        """Called in the training loop before anything happens for that batch.
+        If you return -1 here, you will skip training for the rest of the current epoch.
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            unused: Deprecated argument. Will be removed in v1.7.
+        """
+    def on_train_batch_end(self, outputs: STEP_OUTPUT, batch: Any, batch_idx: int, unused: int = 0) -> None:
+        """Called in the training loop after the batch.
+        Args:
+            outputs: The outputs of training_step_end(training_step(x))
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            unused: Deprecated argument. Will be removed in v1.7.
+        """
+    def on_validation_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """Called in the validation loop before anything happens for that batch.
+        Args:
+            batch: The batched data as it is returned by the validation DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+    def on_validation_batch_end(
+        self, outputs: Optional[STEP_OUTPUT], batch: Any, batch_idx: int, dataloader_idx: int
+    ) -> None:
+        """Called in the validation loop after the batch.
+        Args:
+            outputs: The outputs of validation_step_end(validation_step(x))
+            batch: The batched data as it is returned by the validation DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+    def on_test_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """Called in the test loop before anything happens for that batch.
+        Args:
+            batch: The batched data as it is returned by the test DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+    def on_test_batch_end(
+        self, outputs: Optional[STEP_OUTPUT], batch: Any, batch_idx: int, dataloader_idx: int
+    ) -> None:
+        """Called in the test loop after the batch.
+        Args:
+            outputs: The outputs of test_step_end(test_step(x))
+            batch: The batched data as it is returned by the test DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+    def on_predict_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """Called in the predict loop before anything happens for that batch.
+        Args:
+            batch: The batched data as it is returned by the test DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+    def on_predict_batch_end(self, outputs: Optional[Any], batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """Called in the predict loop after the batch.
+        Args:
+            outputs: The outputs of predict_step_end(test_step(x))
+            batch: The batched data as it is returned by the test DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+    def on_validation_model_eval(self) -> None:
+        """Sets the model to eval during the val loop."""
+        self.trainer.model.eval()
+    def on_validation_model_train(self) -> None:
+        """Sets the model to train during the val loop."""
+        self.trainer.model.train()
+    def on_test_model_train(self) -> None:
+        """Sets the model to train during the test loop."""
+        self.trainer.model.train()
+    def on_test_model_eval(self) -> None:
+        """Sets the model to eval during the test loop."""
+        self.trainer.model.eval()
+    def on_predict_model_eval(self) -> None:
+        """Sets the model to eval during the predict loop."""
+        self.trainer.model.eval()
+    def on_epoch_start(self) -> None:
+        """Called when either of train/val/test epoch begins.
+        .. deprecated:: v1.6
+            :meth:`on_epoch_start` has been deprecated in v1.6 and will be removed in v1.8.
+            Use ``on_<train/validation/test>_epoch_start`` instead.
+        """
+    def on_epoch_end(self) -> None:
+        """Called when either of train/val/test epoch ends.
+        .. deprecated:: v1.6
+            :meth:`on_epoch_end` has been deprecated in v1.6 and will be removed in v1.8.
+            Use ``on_<train/validation/test>_epoch_end`` instead.
+        """
+    def on_train_epoch_start(self) -> None:
+        """Called in the training loop at the very beginning of the epoch."""
+    def on_train_epoch_end(self) -> None:
+        """Called in the training loop at the very end of the epoch.
+        To access all batch outputs at the end of the epoch, either:
+        1. Implement `training_epoch_end` in the LightningModule OR
+        2. Cache data across steps on the attribute(s) of the `LightningModule` and access them in this hook
+        """
+    def on_validation_epoch_start(self) -> None:
+        """Called in the validation loop at the very beginning of the epoch."""
+    def on_validation_epoch_end(self) -> None:
+        """Called in the validation loop at the very end of the epoch."""
+    def on_test_epoch_start(self) -> None:
+        """Called in the test loop at the very beginning of the epoch."""
+    def on_test_epoch_end(self) -> None:
+        """Called in the test loop at the very end of the epoch."""
+    def on_predict_epoch_start(self) -> None:
+        """Called at the beginning of predicting."""
+    def on_predict_epoch_end(self, results: List[Any]) -> None:
+        """Called at the end of predicting."""
+    def on_before_zero_grad(self, optimizer: Optimizer) -> None:
+        """Called after ``training_step()`` and before ``optimizer.zero_grad()``.
+        Called in the training loop after taking an optimizer step and before zeroing grads.
+        Good place to inspect weight information with weights updated.
+        This is where it is called::
+            for optimizer in optimizers:
+                out = training_step(...)
+                model.on_before_zero_grad(optimizer) # < ---- called here
+                optimizer.zero_grad()
+                backward()
+        Args:
+            optimizer: The optimizer for which grads should be zeroed.
+        """
+    def on_before_backward(self, loss: torch.Tensor) -> None:
+        """Called before ``loss.backward()``.
+        Args:
+            loss: Loss divided by number of batches for gradient accumulation and scaled if using native AMP.
+        """
+        pass
+    def on_after_backward(self) -> None:
+        """Called after ``loss.backward()`` and before optimizers are stepped.
+        Note:
+            If using native AMP, the gradients will not be unscaled at this point.
+            Use the ``on_before_optimizer_step`` if you need the unscaled gradients.
+        """
+    def on_before_optimizer_step(self, optimizer: Optimizer, optimizer_idx: int) -> None:
+        """Called before ``optimizer.step()``.
+        If using gradient accumulation, the hook is called once the gradients have been accumulated.
+        See: :paramref:`~pytorch_lightning.trainer.Trainer.accumulate_grad_batches`.
+        If using native AMP, the loss will be unscaled before calling this hook.
+        See these `docs <https://pytorch.org/docs/stable/notes/amp_examples.html#working-with-unscaled-gradients>`__
+        for more information on the scaling of gradients.
+        If clipping gradients, the gradients will not have been clipped yet.
+        Args:
+            optimizer: Current optimizer being used.
+            optimizer_idx: Index of the current optimizer being used.
+        Example::
+            def on_before_optimizer_step(self, optimizer, optimizer_idx):
+                # example to inspect gradient information in tensorboard
+                if self.trainer.global_step % 25 == 0:  # don't make the tf file huge
+                    for k, v in self.named_parameters():
+                        self.logger.experiment.add_histogram(
+                            tag=k, values=v.grad, global_step=self.trainer.global_step
+                        )
+        """
+    def on_post_move_to_device(self) -> None:
+        """Called in the ``parameter_validation`` decorator after
+        :meth:`~pytorch_lightning.core.LightningModule.to` is called. This is a good place to tie weights between
+        modules after moving them to a device. Can be used when training models with weight sharing properties on
+        TPU.
+        Addresses the handling of shared weights on TPU:
+        https://github.com/pytorch/xla/blob/master/TROUBLESHOOTING.md#xla-tensor-quirks
+        Example::
+            def on_post_move_to_device(self):
+                self.decoder.weight = self.encoder.weight
+        """
+    def configure_sharded_model(self) -> None:
+        """Hook to create modules in a distributed aware context. This is useful for when using sharded plugins,
+        where we'd like to shard the model instantly, which is useful for extremely large models which can save
+        memory and initialization time.
+        This hook is called during each of fit/val/test/predict stages in the same process, so ensure that
+        implementation of this hook is idempotent.
+        """
+class DataHooks:
+    """Hooks to be used for data related stuff."""
+    def __init__(self) -> None:
+        """
+        Attributes:
+            prepare_data_per_node:
+                If True, each LOCAL_RANK=0 will call prepare data.
+                Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data.
+            allow_zero_length_dataloader_with_multiple_devices:
+                If True, dataloader with zero length within local rank is allowed.
+                Default value is False.
+        """
+        super().__init__()
+        self.prepare_data_per_node: bool = True
+        self.allow_zero_length_dataloader_with_multiple_devices: bool = False
+    def prepare_data(self) -> None:
+        """Use this to download and prepare data. Downloading and saving data with multiple processes (distributed
+        settings) will result in corrupted data. Lightning ensures this method is called only within a single
+        process, so you can safely add your downloading logic within.
+        .. warning:: DO NOT set state to the model (use ``setup`` instead)
+            since this is NOT called on every device
+        Example::
+            def prepare_data(self):
+                # good
+                download_data()
+                tokenize()
+                etc()
+                # bad
+                self.split = data_split
+                self.some_state = some_other_state()
+        In DDP ``prepare_data`` can be called in two ways (using Trainer(prepare_data_per_node)):
+        1. Once per node. This is the default and is only called on LOCAL_RANK=0.
+        2. Once in total. Only called on GLOBAL_RANK=0.
+        See :ref:`prepare_data_per_node<common/lightning_module:prepare_data_per_node>`.
+        Example::
+            # DEFAULT
+            # called once per node on LOCAL_RANK=0 of that node
+            Trainer(prepare_data_per_node=True)
+            # call on GLOBAL_RANK=0 (great for shared file systems)
+            Trainer(prepare_data_per_node=False)
+        This is called before requesting the dataloaders:
+        .. code-block:: python
+            model.prepare_data()
+            initialize_distributed()
+            model.setup(stage)
+            model.train_dataloader()
+            model.val_dataloader()
+            model.test_dataloader()
+        """
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Called at the beginning of fit (train + validate), validate, test, or predict. This is a good hook when
+        you need to build models dynamically or adjust something about them. This hook is called on every process
+        when using DDP.
+        Args:
+            stage: either ``'fit'``, ``'validate'``, ``'test'``, or ``'predict'``
+        Example::
+            class LitModel(...):
+                def __init__(self):
+                    self.l1 = None
+                def prepare_data(self):
+                    download_data()
+                    tokenize()
+                    # don't do this
+                    self.something = else
+                def setup(self, stage):
+                    data = load_data(...)
+                    self.l1 = nn.Linear(28, data.num_classes)
+        """
+    def teardown(self, stage: Optional[str] = None) -> None:
+        """Called at the end of fit (train + validate), validate, test, or predict.
+        Args:
+            stage: either ``'fit'``, ``'validate'``, ``'test'``, or ``'predict'``
+        """
+    def train_dataloader(self) -> TRAIN_DATALOADERS:
+        """Implement one or more PyTorch DataLoaders for training.
+        Return:
+            A collection of :class:`torch.utils.data.DataLoader` specifying training samples.
+            In the case of multiple dataloaders, please see this :ref:`section <multiple-dataloaders>`.
+        The dataloader you return will not be reloaded unless you set
+        :paramref:`~pytorch_lightning.trainer.Trainer.reload_dataloaders_every_n_epochs` to
+        a positive integer.
+        For data processing use the following pattern:
+            - download in :meth:`prepare_data`
+            - process and split in :meth:`setup`
+        However, the above are only necessary for distributed processing.
+        .. warning:: do not assign state in prepare_data
+        - :meth:`~pytorch_lightning.trainer.trainer.Trainer.fit`
+        - :meth:`prepare_data`
+        - :meth:`setup`
+        Note:
+            Lightning adds the correct sampler for distributed and arbitrary hardware.
+            There is no need to set it yourself.
+        Example::
+            # single dataloader
+            def train_dataloader(self):
+                transform = transforms.Compose([transforms.ToTensor(),
+                                                transforms.Normalize((0.5,), (1.0,))])
+                dataset = MNIST(root='/path/to/mnist/', train=True, transform=transform,
+                                download=True)
+                loader = torch.utils.data.DataLoader(
+                    dataset=dataset,
+                    batch_size=self.batch_size,
+                    shuffle=True
+                )
+                return loader
+            # multiple dataloaders, return as list
+            def train_dataloader(self):
+                mnist = MNIST(...)
+                cifar = CIFAR(...)
+                mnist_loader = torch.utils.data.DataLoader(
+                    dataset=mnist, batch_size=self.batch_size, shuffle=True
+                )
+                cifar_loader = torch.utils.data.DataLoader(
+                    dataset=cifar, batch_size=self.batch_size, shuffle=True
+                )
+                # each batch will be a list of tensors: [batch_mnist, batch_cifar]
+                return [mnist_loader, cifar_loader]
+            # multiple dataloader, return as dict
+            def train_dataloader(self):
+                mnist = MNIST(...)
+                cifar = CIFAR(...)
+                mnist_loader = torch.utils.data.DataLoader(
+                    dataset=mnist, batch_size=self.batch_size, shuffle=True
+                )
+                cifar_loader = torch.utils.data.DataLoader(
+                    dataset=cifar, batch_size=self.batch_size, shuffle=True
+                )
+                # each batch will be a dict of tensors: {'mnist': batch_mnist, 'cifar': batch_cifar}
+                return {'mnist': mnist_loader, 'cifar': cifar_loader}
+        """
+        raise MisconfigurationException("`train_dataloader` must be implemented to be used with the Lightning Trainer")
+    def test_dataloader(self) -> EVAL_DATALOADERS:
+        r"""
+        Implement one or multiple PyTorch DataLoaders for testing.
+        For data processing use the following pattern:
+            - download in :meth:`prepare_data`
+            - process and split in :meth:`setup`
+        However, the above are only necessary for distributed processing.
+        .. warning:: do not assign state in prepare_data
+        - :meth:`~pytorch_lightning.trainer.trainer.Trainer.test`
+        - :meth:`prepare_data`
+        - :meth:`setup`
+        Note:
+            Lightning adds the correct sampler for distributed and arbitrary hardware.
+            There is no need to set it yourself.
+        Return:
+            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying testing samples.
+        Example::
+            def test_dataloader(self):
+                transform = transforms.Compose([transforms.ToTensor(),
+                                                transforms.Normalize((0.5,), (1.0,))])
+                dataset = MNIST(root='/path/to/mnist/', train=False, transform=transform,
+                                download=True)
+                loader = torch.utils.data.DataLoader(
+                    dataset=dataset,
+                    batch_size=self.batch_size,
+                    shuffle=False
+                )
+                return loader
+            # can also return multiple dataloaders
+            def test_dataloader(self):
+                return [loader_a, loader_b, ..., loader_n]
+        Note:
+            If you don't need a test dataset and a :meth:`test_step`, you don't need to implement
+            this method.
+        Note:
+            In the case where you return multiple test dataloaders, the :meth:`test_step`
+            will have an argument ``dataloader_idx`` which matches the order here.
+        """
+        raise MisconfigurationException("`test_dataloader` must be implemented to be used with the Lightning Trainer")
+    def val_dataloader(self) -> EVAL_DATALOADERS:
+        r"""
+        Implement one or multiple PyTorch DataLoaders for validation.
+        The dataloader you return will not be reloaded unless you set
+        :paramref:`~pytorch_lightning.trainer.Trainer.reload_dataloaders_every_n_epochs` to
+        a positive integer.
+        It's recommended that all data downloads and preparation happen in :meth:`prepare_data`.
+        - :meth:`~pytorch_lightning.trainer.trainer.Trainer.fit`
+        - :meth:`~pytorch_lightning.trainer.trainer.Trainer.validate`
+        - :meth:`prepare_data`
+        - :meth:`setup`
+        Note:
+            Lightning adds the correct sampler for distributed and arbitrary hardware
+            There is no need to set it yourself.
+        Return:
+            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
+        Examples::
+            def val_dataloader(self):
+                transform = transforms.Compose([transforms.ToTensor(),
+                                                transforms.Normalize((0.5,), (1.0,))])
+                dataset = MNIST(root='/path/to/mnist/', train=False,
+                                transform=transform, download=True)
+                loader = torch.utils.data.DataLoader(
+                    dataset=dataset,
+                    batch_size=self.batch_size,
+                    shuffle=False
+                )
+                return loader
+            # can also return multiple dataloaders
+            def val_dataloader(self):
+                return [loader_a, loader_b, ..., loader_n]
+        Note:
+            If you don't need a validation dataset and a :meth:`validation_step`, you don't need to
+            implement this method.
+        Note:
+            In the case where you return multiple validation dataloaders, the :meth:`validation_step`
+            will have an argument ``dataloader_idx`` which matches the order here.
+        """
+        raise MisconfigurationException("`val_dataloader` must be implemented to be used with the Lightning Trainer")
+    def predict_dataloader(self) -> EVAL_DATALOADERS:
+        r"""
+        Implement one or multiple PyTorch DataLoaders for prediction.
+        It's recommended that all data downloads and preparation happen in :meth:`prepare_data`.
+        - :meth:`~pytorch_lightning.trainer.trainer.Trainer.predict`
+        - :meth:`prepare_data`
+        - :meth:`setup`
+        Note:
+            Lightning adds the correct sampler for distributed and arbitrary hardware
+            There is no need to set it yourself.
+        Return:
+            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying prediction samples.
+        Note:
+            In the case where you return multiple prediction dataloaders, the :meth:`predict_step`
+            will have an argument ``dataloader_idx`` which matches the order here.
+        """
+        raise MisconfigurationException(
+            "`predict_dataloader` must be implemented to be used with the Lightning Trainer"
+        )
+    def on_train_dataloader(self) -> None:
+        """Called before requesting the train dataloader.
+        .. deprecated:: v1.5
+            :meth:`on_train_dataloader` is deprecated and will be removed in v1.7.0.
+            Please use :meth:`train_dataloader()` directly.
+        """
+    def on_val_dataloader(self) -> None:
+        """Called before requesting the val dataloader.
+        .. deprecated:: v1.5
+            :meth:`on_val_dataloader` is deprecated and will be removed in v1.7.0.
+            Please use :meth:`val_dataloader()` directly.
+        """
+    def on_test_dataloader(self) -> None:
+        """Called before requesting the test dataloader.
+        .. deprecated:: v1.5
+            :meth:`on_test_dataloader` is deprecated and will be removed in v1.7.0.
+            Please use :meth:`test_dataloader()` directly.
+        """
+    def on_predict_dataloader(self) -> None:
+        """Called before requesting the predict dataloader.
+        .. deprecated:: v1.5
+            :meth:`on_predict_dataloader` is deprecated and will be removed in v1.7.0.
+            Please use :meth:`predict_dataloader()` directly.
+        """
+    def transfer_batch_to_device(self, batch: Any, device: torch.device, dataloader_idx: int) -> Any:
+        """Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors wrapped in a custom
+        data structure.
+        The data types listed below (and any arbitrary nesting of them) are supported out of the box:
+        - :class:`torch.Tensor` or anything that implements `.to(...)`
+        - :class:`list`
+        - :class:`dict`
+        - :class:`tuple`
+        - :class:`torchtext.data.batch.Batch`
+        For anything else, you need to define how the data is moved to the target device (CPU, GPU, TPU, ...).
+        Note:
+            This hook should only transfer the data and not modify it, nor should it move the data to
+            any other device than the one passed in as argument (unless you know what you are doing).
+            To check the current state of execution of this hook you can use
+            ``self.trainer.training/testing/validating/predicting`` so that you can
+            add different logic as per your requirement.
+        Note:
+            This hook only runs on single GPU training and DDP (no data-parallel).
+            Data-Parallel support will come in near future.
+        Args:
+            batch: A batch of data that needs to be transferred to a new device.
+            device: The target device as defined in PyTorch.
+            dataloader_idx: The index of the dataloader to which the batch belongs.
+        Returns:
+            A reference to the data on the new device.
+        Example::
+            def transfer_batch_to_device(self, batch, device, dataloader_idx):
+                if isinstance(batch, CustomBatch):
+                    # move all tensors in your custom data structure to the device
+                    batch.samples = batch.samples.to(device)
+                    batch.targets = batch.targets.to(device)
+                elif dataloader_idx == 0:
+                    # skip device transfer for the first dataloader or anything you wish
+                    pass
+                else:
+                    batch = super().transfer_batch_to_device(data, device, dataloader_idx)
+                return batch
+        Raises:
+            MisconfigurationException:
+                If using data-parallel, ``Trainer(strategy='dp')``.
+        See Also:
+            - :meth:`move_data_to_device`
+            - :meth:`apply_to_collection`
+        """
+        return move_data_to_device(batch, device)
+    def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
+        """Override to alter or apply batch augmentations to your batch before it is transferred to the device.
+        Note:
+            To check the current state of execution of this hook you can use
+            ``self.trainer.training/testing/validating/predicting`` so that you can
+            add different logic as per your requirement.
+        Note:
+            This hook only runs on single GPU training and DDP (no data-parallel).
+            Data-Parallel support will come in near future.
+        Args:
+            batch: A batch of data that needs to be altered or augmented.
+            dataloader_idx: The index of the dataloader to which the batch belongs.
+        Returns:
+            A batch of data
+        Example::
+            def on_before_batch_transfer(self, batch, dataloader_idx):
+                batch['x'] = transforms(batch['x'])
+                return batch
+        Raises:
+            MisconfigurationException:
+                If using data-parallel, ``Trainer(strategy='dp')``.
+        See Also:
+            - :meth:`on_after_batch_transfer`
+            - :meth:`transfer_batch_to_device`
+        """
+        return batch
+    def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
+        """Override to alter or apply batch augmentations to your batch after it is transferred to the device.
+        Note:
+            To check the current state of execution of this hook you can use
+            ``self.trainer.training/testing/validating/predicting`` so that you can
+            add different logic as per your requirement.
+        Note:
+            This hook only runs on single GPU training and DDP (no data-parallel).
+            Data-Parallel support will come in near future.
+        Args:
+            batch: A batch of data that needs to be altered or augmented.
+            dataloader_idx: The index of the dataloader to which the batch belongs.
+        Returns:
+            A batch of data
+        Example::
+            def on_after_batch_transfer(self, batch, dataloader_idx):
+                batch['x'] = gpu_transforms(batch['x'])
+                return batch
+        Raises:
+            MisconfigurationException:
+                If using data-parallel, ``Trainer(strategy='dp')``.
+        See Also:
+            - :meth:`on_before_batch_transfer`
+            - :meth:`transfer_batch_to_device`
+        """
+        return batch
+class CheckpointHooks:
+    """Hooks to be used with Checkpointing."""
+    def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        r"""
+        Called by Lightning to restore your model.
+        If you saved something with :meth:`on_save_checkpoint` this is your chance to restore this.
+        Args:
+            checkpoint: Loaded checkpoint
+        Example::
+            def on_load_checkpoint(self, checkpoint):
+                # 99% of the time you don't need to implement this method
+                self.something_cool_i_want_to_save = checkpoint['something_cool_i_want_to_save']
+        Note:
+            Lightning auto-restores global step, epoch, and train state including amp scaling.
+            There is no need for you to restore anything regarding training.
+        """
+    def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        r"""
+        Called by Lightning when saving a checkpoint to give you a chance to store anything
+        else you might want to save.
+        Args:
+            checkpoint: The full checkpoint dictionary before it gets dumped to a file.
+                Implementations of this hook can insert additional data into this dictionary.
+        Example::
+            def on_save_checkpoint(self, checkpoint):
+                # 99% of use cases you don't need to implement this method
+                checkpoint['something_cool_i_want_to_save'] = my_cool_pickable_object
+        Note:
+            Lightning saves all aspects of training (epoch, global step, etc...)
+            including amp scaling.
+            There is no need for you to store anything about training.
+        """

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py ADDED Viewed

	@@ -0,0 +1,409 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from contextlib import contextmanager
+from dataclasses import fields
+from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
+from weakref import proxy
+import torch
+from torch import optim
+from torch.optim import Optimizer
+import pytorch_lightning as pl
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.model_helpers import is_overridden
+from pytorch_lightning.utilities.rank_zero import rank_zero_warn
+from pytorch_lightning.utilities.types import _Stateful, LRSchedulerConfig, LRSchedulerTypeTuple, ReduceLROnPlateau
+def do_nothing_closure() -> None:
+    return
+class LightningOptimizer:
+    """This class is used to wrap the user optimizers and handle properly the backward and optimizer_step logic
+    across accelerators, AMP, accumulate_grad_batches."""
+    def __init__(self, optimizer: Optimizer):
+        # copy most of the `Optimizer` methods into this instance. `__del__` is skipped in case the optimizer has
+        # implemented custom logic which we would not want to call on destruction of the `LightningOptimizer`
+        self.__dict__ = {k: v for k, v in optimizer.__dict__.items() if k not in ("step", "__del__")}
+        # For Horovod
+        if hasattr(optimizer, "skip_synchronize"):
+            self.__class__ = type(
+                "Lightning" + optimizer.__class__.__name__, (self.__class__, optimizer.__class__.__bases__[0]), {}
+            )
+            self.skip_synchronize = optimizer.skip_synchronize
+            self.synchronize = optimizer.synchronize
+        else:
+            self.__class__ = type("Lightning" + optimizer.__class__.__name__, (self.__class__, optimizer.__class__), {})
+        self._optimizer = optimizer
+        self._strategy: Optional[pl.strategies.Strategy] = None
+        self._optimizer_idx = 0
+        # to inject logic around the optimizer step, particularly useful with manual optimization
+        self._on_before_step = do_nothing_closure
+        self._on_after_step = do_nothing_closure
+    @property
+    def optimizer(self) -> Optimizer:
+        return self._optimizer
+    @classmethod
+    def _to_lightning_optimizer(
+        cls, optimizer: Union[Optimizer, "LightningOptimizer"], strategy: "pl.strategies.Strategy", opt_idx: int
+    ) -> "LightningOptimizer":
+        if isinstance(optimizer, LightningOptimizer):
+            # the user could return a `LightningOptimizer` from `configure_optimizers`, see test:
+            # tests/core/test_lightning_optimizer.py::test_lightning_optimizer[False]
+            lightning_optimizer = optimizer
+        else:
+            lightning_optimizer = cls(optimizer)
+        lightning_optimizer._strategy = proxy(strategy)
+        lightning_optimizer._optimizer_idx = opt_idx
+        return lightning_optimizer
+    @contextmanager
+    def toggle_model(self, sync_grad: bool = True) -> Generator[None, None, None]:
+        """This function is just a helper for advanced users.
+        Considering the current optimizer as A and all other optimizers as B.
+        Toggling means all parameters from B exclusive to A will have ``requires_grad`` set to False.
+        When performing gradient accumulation, there is no need to perform grad synchronization
+        during the accumulation phase.
+        Setting `sync_grad` to False will block this synchronization and improve performance.
+        """
+        # local import here to avoid circular import
+        from pytorch_lightning.loops.utilities import _block_parallel_sync_behavior
+        assert self._strategy is not None
+        lightning_module = self._strategy.lightning_module
+        assert lightning_module is not None
+        with _block_parallel_sync_behavior(self._strategy, block=(not sync_grad)):
+            lightning_module.toggle_optimizer(self, self._optimizer_idx)
+            yield
+            lightning_module.untoggle_optimizer(self._optimizer_idx)
+    def step(self, closure: Optional[Callable[[], Any]] = None, **kwargs: Any) -> Any:
+        """Performs a single optimization step (parameter update).
+        Args:
+            closure: An optional optimizer closure.
+            kwargs: Any additional arguments to the ``optimizer.step()`` call.
+        Returns:
+            The output from the step call, which is generally the output of the closure execution.
+        Example::
+            # Scenario for a GAN using manual optimization
+            def training_step(...):
+                opt_gen, opt_dis = self.optimizers()
+                ...
+                # compute generator loss
+                loss_gen = self.compute_generator_loss(...)
+                # zero_grad needs to be called before backward
+                opt_gen.zero_grad()
+                self.manual_backward(loss_gen)
+                opt_gen.step()
+                # compute discriminator loss
+                loss_dis = self.compute_discriminator_loss(...)
+                # zero_grad needs to be called before backward
+                opt_dis.zero_grad()
+                self.manual_backward(loss_dis)
+                opt_dis.step()
+            # A more advanced example
+            def training_step(self, batch, batch_idx, ...):
+                opt_gen, opt_dis = self.optimizers()
+                ...
+                accumulated_grad_batches = batch_idx % 2 == 0
+                # compute generator loss
+                def closure_gen():
+                    loss_gen = self.compute_generator_loss(...)
+                    self.manual_backward(loss_gen)
+                    if accumulated_grad_batches:
+                        opt_gen.zero_grad()
+                with opt_gen.toggle_model(sync_grad=accumulated_grad_batches):
+                    opt_gen.step(closure=closure_gen)
+                def closure_dis():
+                    loss_dis = self.compute_discriminator_loss(...)
+                    self.manual_backward(loss_dis)
+                    if accumulated_grad_batches:
+                        opt_dis.zero_grad()
+                with opt_dis.toggle_model(sync_grad=accumulated_grad_batches):
+                    opt_dis.step(closure=closure_dis)
+        """
+        self._on_before_step()
+        if closure is None:
+            closure = do_nothing_closure
+        elif not callable(closure):
+            raise MisconfigurationException("When `optimizer.step(closure)` is called, the closure should be callable")
+        assert self._strategy is not None
+        step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, **kwargs)
+        self._on_after_step()
+        return step_output
+def _init_optimizers_and_lr_schedulers(
+    model: "pl.LightningModule",
+) -> Tuple[List[Optimizer], List[LRSchedulerConfig], List[int]]:
+    """Calls `LightningModule.configure_optimizers` and parses and validates the output."""
+    assert model.trainer is not None
+    optim_conf = model.trainer._call_lightning_module_hook("configure_optimizers", pl_module=model)
+    if optim_conf is None:
+        rank_zero_warn(
+            "`LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer",
+        )
+        optim_conf = _MockOptimizer()
+    optimizers, lr_schedulers, optimizer_frequencies, monitor = _configure_optimizers(optim_conf)
+    lr_scheduler_configs = (
+        _configure_schedulers_automatic_opt(lr_schedulers, monitor)
+        if model.automatic_optimization
+        else _configure_schedulers_manual_opt(lr_schedulers)
+    )
+    _set_scheduler_opt_idx(optimizers, lr_scheduler_configs)
+    _validate_scheduler_api(lr_scheduler_configs, model)
+    return optimizers, lr_scheduler_configs, optimizer_frequencies
+def _configure_optimizers(
+    optim_conf: Union[Dict[str, Any], List, Optimizer, Tuple]
+) -> Tuple[List, List, List, Optional[str]]:
+    optimizers, lr_schedulers, optimizer_frequencies = [], [], []
+    monitor = None
+    # single output, single optimizer
+    if isinstance(optim_conf, Optimizer):
+        optimizers = [optim_conf]
+    # two lists, optimizer + lr schedulers
+    elif (
+        isinstance(optim_conf, (list, tuple))
+        and len(optim_conf) == 2
+        and isinstance(optim_conf[0], list)
+        and all(isinstance(opt, Optimizer) for opt in optim_conf[0])
+    ):
+        opt, sch = optim_conf
+        optimizers = opt
+        lr_schedulers = sch if isinstance(sch, list) else [sch]
+    # single dictionary
+    elif isinstance(optim_conf, dict):
+        _validate_optim_conf(optim_conf)
+        optimizers = [optim_conf["optimizer"]]
+        monitor = optim_conf.get("monitor", None)
+        lr_schedulers = [optim_conf["lr_scheduler"]] if "lr_scheduler" in optim_conf else []
+    # multiple dictionaries
+    elif isinstance(optim_conf, (list, tuple)) and all(isinstance(d, dict) for d in optim_conf):
+        for opt_dict in optim_conf:
+            _validate_optim_conf(opt_dict)
+        optimizers = [opt_dict["optimizer"] for opt_dict in optim_conf]
+        scheduler_dict = (
+            lambda scheduler, opt_idx: dict(scheduler, opt_idx=opt_idx)
+            if isinstance(scheduler, dict)
+            else {"scheduler": scheduler, "opt_idx": opt_idx}
+        )
+        lr_schedulers = [
+            scheduler_dict(opt_dict["lr_scheduler"], opt_idx)
+            for opt_idx, opt_dict in enumerate(optim_conf)
+            if "lr_scheduler" in opt_dict
+        ]
+        optimizer_frequencies = [
+            opt_dict["frequency"] for opt_dict in optim_conf if opt_dict.get("frequency", None) is not None
+        ]
+        # assert that if frequencies are present, they are given for all optimizers
+        if optimizer_frequencies and len(optimizer_frequencies) != len(optimizers):
+            raise ValueError("A frequency must be given to each optimizer.")
+    # single list or tuple, multiple optimizer
+    elif isinstance(optim_conf, (list, tuple)) and all(isinstance(opt, Optimizer) for opt in optim_conf):
+        optimizers = list(optim_conf)
+    # unknown configuration
+    else:
+        raise MisconfigurationException(
+            "Unknown configuration for model optimizers."
+            " Output from `model.configure_optimizers()` should be one of:\n"
+            " * `Optimizer`\n"
+            " * [`Optimizer`]\n"
+            " * ([`Optimizer`], [`_LRScheduler`])\n"
+            ' * {"optimizer": `Optimizer`, (optional) "lr_scheduler": `_LRScheduler`}\n'
+            ' * A list of the previously described dict format, with an optional "frequency" key (int)'
+        )
+    return optimizers, lr_schedulers, optimizer_frequencies, monitor
+def _configure_schedulers_automatic_opt(schedulers: list, monitor: Optional[str]) -> List[LRSchedulerConfig]:
+    """Convert each scheduler into `LRSchedulerConfig` with relevant information, when using automatic
+    optimization."""
+    lr_scheduler_configs = []
+    for scheduler in schedulers:
+        if isinstance(scheduler, dict):
+            # check provided keys
+            supported_keys = {field.name for field in fields(LRSchedulerConfig)}
+            extra_keys = scheduler.keys() - supported_keys
+            if extra_keys:
+                rank_zero_warn(
+                    f"Found unsupported keys in the lr scheduler dict: {extra_keys}."
+                    " HINT: remove them from the output of `configure_optimizers`.",
+                    category=RuntimeWarning,
+                )
+                scheduler = {k: v for k, v in scheduler.items() if k in supported_keys}
+            if "scheduler" not in scheduler:
+                raise MisconfigurationException(
+                    'The lr scheduler dict must have the key "scheduler" with its item being an lr scheduler'
+                )
+            if "interval" in scheduler and scheduler["interval"] not in ("step", "epoch"):
+                raise MisconfigurationException(
+                    'The "interval" key in lr scheduler dict must be "step" or "epoch"'
+                    f' but is "{scheduler["interval"]}"'
+                )
+            scheduler["reduce_on_plateau"] = isinstance(scheduler["scheduler"], optim.lr_scheduler.ReduceLROnPlateau)
+            if scheduler["reduce_on_plateau"] and scheduler.get("monitor", None) is None:
+                raise MisconfigurationException(
+                    "The lr scheduler dict must include a monitor when a `ReduceLROnPlateau` scheduler is used."
+                    ' For example: {"optimizer": optimizer, "lr_scheduler":'
+                    ' {"scheduler": scheduler, "monitor": "your_loss"}}'
+                )
+            is_one_cycle = isinstance(scheduler["scheduler"], optim.lr_scheduler.OneCycleLR)
+            if is_one_cycle and scheduler.get("interval", "epoch") == "epoch":
+                rank_zero_warn(
+                    "A `OneCycleLR` scheduler is using 'interval': 'epoch'."
+                    " Are you sure you didn't mean 'interval': 'step'?",
+                    category=RuntimeWarning,
+                )
+            config = LRSchedulerConfig(**scheduler)
+        elif isinstance(scheduler, ReduceLROnPlateau):
+            if monitor is None:
+                raise MisconfigurationException(
+                    "`configure_optimizers` must include a monitor when a `ReduceLROnPlateau`"
+                    " scheduler is used. For example:"
+                    ' {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "metric_to_track"}'
+                )
+            config = LRSchedulerConfig(scheduler, reduce_on_plateau=True, monitor=monitor)
+        else:
+            config = LRSchedulerConfig(scheduler)
+        lr_scheduler_configs.append(config)
+    return lr_scheduler_configs
+def _configure_schedulers_manual_opt(schedulers: list) -> List[LRSchedulerConfig]:
+    """Convert each scheduler into `LRSchedulerConfig` structure with relevant information, when using manual
+    optimization."""
+    lr_scheduler_configs = []
+    for scheduler in schedulers:
+        if isinstance(scheduler, dict):
+            invalid_keys = {"interval", "frequency", "reduce_on_plateau", "monitor", "strict"}
+            keys_to_warn = [k for k in scheduler.keys() if k in invalid_keys]
+            if keys_to_warn:
+                rank_zero_warn(
+                    f"The lr scheduler dict contains the key(s) {keys_to_warn}, but the keys will be ignored."
+                    " You need to call `lr_scheduler.step()` manually in manual optimization.",
+                    category=RuntimeWarning,
+                )
+            config = LRSchedulerConfig(**{key: scheduler[key] for key in scheduler if key not in invalid_keys})
+        else:
+            config = LRSchedulerConfig(scheduler)
+        lr_scheduler_configs.append(config)
+    return lr_scheduler_configs
+def _validate_scheduler_api(lr_scheduler_configs: List[LRSchedulerConfig], model: "pl.LightningModule") -> None:
+    for config in lr_scheduler_configs:
+        scheduler = config.scheduler
+        if not isinstance(scheduler, _Stateful):
+            raise TypeError(
+                f"The provided lr scheduler `{scheduler.__class__.__name__}` is invalid."
+                " It should have `state_dict` and `load_state_dict` methods defined."
+            )
+        if not isinstance(scheduler, LRSchedulerTypeTuple) and not is_overridden("lr_scheduler_step", model):
+            raise MisconfigurationException(
+                f"The provided lr scheduler `{scheduler.__class__.__name__}` doesn't follow PyTorch's LRScheduler"
+                " API. You should override the `LightningModule.lr_scheduler_step` hook with your own logic if"
+                " you are using a custom LR scheduler."
+            )
+def _set_scheduler_opt_idx(optimizers: List[Optimizer], lr_scheduler_configs: List[LRSchedulerConfig]) -> None:
+    for config in lr_scheduler_configs:
+        for opt_idx, opt in enumerate(optimizers):
+            if config.scheduler.optimizer is opt:
+                if config.opt_idx is not None and config.opt_idx != opt_idx:
+                    raise MisconfigurationException(
+                        "`opt_idx` set inside scheduler config does not match with the index"
+                        " of the respective optimizer returned from `configure_optimizers`."
+                    )
+                config.opt_idx = opt_idx
+                break
+        else:
+            raise MisconfigurationException(
+                "Some schedulers are attached with an optimizer that wasn't returned from `configure_optimizers`."
+            )
+def _validate_optim_conf(optim_conf: Dict[str, Any]) -> None:
+    valid_keys = {"optimizer", "lr_scheduler", "frequency", "monitor"}
+    extra_keys = optim_conf.keys() - valid_keys
+    if extra_keys:
+        rank_zero_warn(
+            f"Found unsupported keys in the optimizer configuration: {set(extra_keys)}", category=RuntimeWarning
+        )
+class _MockOptimizer(Optimizer):
+    """The `_MockOptimizer` will be used inplace of an optimizer in the event that `None` is returned from
+    `configure_optimizers`."""
+    def __init__(self) -> None:
+        super().__init__([torch.zeros(1)], {})
+    def add_param_group(self, param_group: Dict[Any, Any]) -> None:
+        pass  # Do Nothing
+    def load_state_dict(self, state_dict: Dict[Any, Any]) -> None:
+        pass  # Do Nothing
+    def state_dict(self) -> Dict[str, Any]:
+        return {}  # Return Empty
+    def step(self, closure: Callable = None) -> None:
+        if closure is not None:
+            closure()
+    def zero_grad(self, set_to_none: Optional[bool] = False) -> None:
+        pass  # Do Nothing
+    def __repr__(self) -> str:
+        return "No Optimizer"

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/saving.py ADDED Viewed

	@@ -0,0 +1,419 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+import csv
+import inspect
+import logging
+import os
+from argparse import Namespace
+from copy import deepcopy
+from enum import Enum
+from typing import Any, Callable, Dict, IO, MutableMapping, Optional, Union
+from warnings import warn
+import torch
+import yaml
+from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, AttributeDict
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.cloud_io import get_filesystem
+from pytorch_lightning.utilities.cloud_io import load as pl_load
+from pytorch_lightning.utilities.migration import pl_legacy_patch
+from pytorch_lightning.utilities.parsing import parse_class_init_keys
+from pytorch_lightning.utilities.rank_zero import rank_zero_warn
+log = logging.getLogger(__name__)
+PRIMITIVE_TYPES = (bool, int, float, str)
+ALLOWED_CONFIG_TYPES = (AttributeDict, MutableMapping, Namespace)
+if _OMEGACONF_AVAILABLE:
+    from omegaconf import OmegaConf
+    from omegaconf.dictconfig import DictConfig
+    from omegaconf.errors import UnsupportedValueType, ValidationError
+# the older shall be on the top
+CHECKPOINT_PAST_HPARAMS_KEYS = ("hparams", "module_arguments")  # used in 0.7.6
+class ModelIO:
+    CHECKPOINT_HYPER_PARAMS_KEY = "hyper_parameters"
+    CHECKPOINT_HYPER_PARAMS_NAME = "hparams_name"
+    CHECKPOINT_HYPER_PARAMS_TYPE = "hparams_type"
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        checkpoint_path: Union[str, IO],
+        map_location: Optional[Union[Dict[str, str], str, torch.device, int, Callable]] = None,
+        hparams_file: Optional[str] = None,
+        strict: bool = True,
+        **kwargs,
+    ):
+        r"""
+        Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint
+        it stores the arguments passed to ``__init__``  in the checkpoint under ``"hyper_parameters"``.
+        Any arguments specified through \*\*kwargs will override args stored in ``"hyper_parameters"``.
+        Args:
+            checkpoint_path: Path to checkpoint. This can also be a URL, or file-like object
+            map_location:
+                If your checkpoint saved a GPU model and you now load on CPUs
+                or a different number of GPUs, use this to map to the new setup.
+                The behaviour is the same as in :func:`torch.load`.
+            hparams_file: Optional path to a .yaml file with hierarchical structure
+                as in this example::
+                    drop_prob: 0.2
+                    dataloader:
+                        batch_size: 32
+                You most likely won't need this since Lightning will always save the hyperparameters
+                to the checkpoint.
+                However, if your checkpoint weights don't have the hyperparameters saved,
+                use this method to pass in a .yaml file with the hparams you'd like to use.
+                These will be converted into a :class:`~dict` and passed into your
+                :class:`LightningModule` for use.
+                If your model's ``hparams`` argument is :class:`~argparse.Namespace`
+                and .yaml file has hierarchical structure, you need to refactor your model to treat
+                ``hparams`` as :class:`~dict`.
+            strict: Whether to strictly enforce that the keys in :attr:`checkpoint_path` match the keys
+                returned by this module's state dict.
+            kwargs: Any extra keyword args needed to init the model. Can also be used to override saved
+                hyperparameter values.
+        Return:
+            :class:`LightningModule` instance with loaded weights and hyperparameters (if available).
+        Note:
+            ``load_from_checkpoint`` is a **class** method. You should use your :class:`LightningModule`
+            **class** to call it instead of the :class:`LightningModule` instance.
+        Example::
+            # load weights without mapping ...
+            model = MyLightningModule.load_from_checkpoint('path/to/checkpoint.ckpt')
+            # or load weights mapping all weights from GPU 1 to GPU 0 ...
+            map_location = {'cuda:1':'cuda:0'}
+            model = MyLightningModule.load_from_checkpoint(
+                'path/to/checkpoint.ckpt',
+                map_location=map_location
+            )
+            # or load weights and hyperparameters from separate files.
+            model = MyLightningModule.load_from_checkpoint(
+                'path/to/checkpoint.ckpt',
+                hparams_file='/path/to/hparams_file.yaml'
+            )
+            # override some of the params with new values
+            model = MyLightningModule.load_from_checkpoint(
+                PATH,
+                num_layers=128,
+                pretrained_ckpt_path=NEW_PATH,
+            )
+            # predict
+            pretrained_model.eval()
+            pretrained_model.freeze()
+            y_hat = pretrained_model(x)
+        """
+        with pl_legacy_patch():
+            if map_location is not None:
+                checkpoint = pl_load(checkpoint_path, map_location=map_location)
+            else:
+                checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
+        if hparams_file is not None:
+            extension = hparams_file.split(".")[-1]
+            if extension.lower() == "csv":
+                hparams = load_hparams_from_tags_csv(hparams_file)
+            elif extension.lower() in ("yml", "yaml"):
+                hparams = load_hparams_from_yaml(hparams_file)
+            else:
+                raise ValueError(".csv, .yml or .yaml is required for `hparams_file`")
+            hparams["on_gpu"] = False
+            # overwrite hparams by the given file
+            checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams
+        # for past checkpoint need to add the new key
+        if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint:
+            checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {}
+        # override the hparams with values that were passed in
+        checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].update(kwargs)
+        model = cls._load_model_state(checkpoint, strict=strict, **kwargs)
+        return model
+    @classmethod
+    def _load_model_state(cls, checkpoint: Dict[str, Any], strict: bool = True, **cls_kwargs_new):
+        cls_spec = inspect.getfullargspec(cls.__init__)
+        cls_init_args_name = inspect.signature(cls.__init__).parameters.keys()
+        self_var, args_var, kwargs_var = parse_class_init_keys(cls)
+        drop_names = [n for n in (self_var, args_var, kwargs_var) if n]
+        cls_init_args_name = list(filter(lambda n: n not in drop_names, cls_init_args_name))
+        cls_kwargs_loaded = {}
+        # pass in the values we saved automatically
+        if cls.CHECKPOINT_HYPER_PARAMS_KEY in checkpoint:
+            # 1. (backward compatibility) Try to restore model hparams from checkpoint using old/past keys
+            for _old_hparam_key in CHECKPOINT_PAST_HPARAMS_KEYS:
+                cls_kwargs_loaded.update(checkpoint.get(_old_hparam_key, {}))
+            # 2. Try to restore model hparams from checkpoint using the new key
+            _new_hparam_key = cls.CHECKPOINT_HYPER_PARAMS_KEY
+            cls_kwargs_loaded.update(checkpoint.get(_new_hparam_key))
+            # 3. Ensure that `cls_kwargs_old` has the right type, back compatibility between dict and Namespace
+            cls_kwargs_loaded = _convert_loaded_hparams(
+                cls_kwargs_loaded, checkpoint.get(cls.CHECKPOINT_HYPER_PARAMS_TYPE)
+            )
+            # 4. Update cls_kwargs_new with cls_kwargs_old, such that new has higher priority
+            args_name = checkpoint.get(cls.CHECKPOINT_HYPER_PARAMS_NAME)
+            if args_name and args_name in cls_init_args_name:
+                cls_kwargs_loaded = {args_name: cls_kwargs_loaded}
+        _cls_kwargs = {}
+        _cls_kwargs.update(cls_kwargs_loaded)
+        _cls_kwargs.update(cls_kwargs_new)
+        if not cls_spec.varkw:
+            # filter kwargs according to class init unless it allows any argument via kwargs
+            _cls_kwargs = {k: v for k, v in _cls_kwargs.items() if k in cls_init_args_name}
+        model = cls(**_cls_kwargs)
+        # give model a chance to load something
+        model.on_load_checkpoint(checkpoint)
+        # load the state_dict on the model automatically
+        keys = model.load_state_dict(checkpoint["state_dict"], strict=strict)
+        if not strict:
+            if keys.missing_keys:
+                rank_zero_warn(
+                    f"Found keys that are in the model state dict but not in the checkpoint: {keys.missing_keys}"
+                )
+            if keys.unexpected_keys:
+                rank_zero_warn(
+                    f"Found keys that are not in the model state dict but in the checkpoint: {keys.unexpected_keys}"
+                )
+        return model
+    # -------------------------
+    # OPTIONAL HOOKS
+    # -------------------------
+    def on_hpc_save(self, checkpoint: Dict[str, Any]) -> None:
+        """Hook to do whatever you need right before Slurm manager saves the model.
+        Args:
+            checkpoint: A dictionary in which you can save variables to save in a checkpoint.
+                Contents need to be pickleable.
+        .. deprecated:: v1.6
+            This method is deprecated in v1.6 and will be removed in v1.8.
+            Please use ``LightningModule.on_save_checkpoint`` instead.
+        """
+    def on_hpc_load(self, checkpoint: Dict[str, Any]) -> None:
+        """Hook to do whatever you need right before Slurm manager loads the model.
+        Args:
+            checkpoint: A dictionary with variables from the checkpoint.
+        .. deprecated:: v1.6
+            This method is deprecated in v1.6 and will be removed in v1.8.
+            Please use ``LightningModule.on_load_checkpoint`` instead.
+        """
+def _convert_loaded_hparams(model_args: dict, hparams_type: Optional[Union[Callable, str]] = None) -> object:
+    """Convert hparams according given type in callable or string (past) format."""
+    # if not hparams type define
+    if not hparams_type:
+        return model_args
+    # if past checkpoint loaded, convert str to callable
+    if isinstance(hparams_type, str):
+        hparams_type = AttributeDict
+    # convert hparams
+    return hparams_type(model_args)
+def update_hparams(hparams: dict, updates: dict) -> None:
+    """Overrides hparams with new values.
+    >>> hparams = {'c': 4}
+    >>> update_hparams(hparams, {'a': {'b': 2}, 'c': 1})
+    >>> hparams['a']['b'], hparams['c']
+    (2, 1)
+    >>> update_hparams(hparams, {'a': {'b': 4}, 'c': 7})
+    >>> hparams['a']['b'], hparams['c']
+    (4, 7)
+    Args:
+        hparams: the original params and also target object
+        updates: new params to be used as update
+    """
+    for k, v in updates.items():
+        # if missing, add the key
+        if k not in hparams:
+            hparams[k] = v
+            continue
+        # recurse if dictionary
+        if isinstance(v, dict):
+            update_hparams(hparams[k], updates[k])
+        else:
+            # update the value
+            hparams.update({k: v})
+def load_hparams_from_tags_csv(tags_csv: str) -> Dict[str, Any]:
+    """Load hparams from a file.
+    >>> hparams = Namespace(batch_size=32, learning_rate=0.001, data_root='./any/path/here')
+    >>> path_csv = os.path.join('.', 'testing-hparams.csv')
+    >>> save_hparams_to_tags_csv(path_csv, hparams)
+    >>> hparams_new = load_hparams_from_tags_csv(path_csv)
+    >>> vars(hparams) == hparams_new
+    True
+    >>> os.remove(path_csv)
+    """
+    fs = get_filesystem(tags_csv)
+    if not fs.exists(tags_csv):
+        rank_zero_warn(f"Missing Tags: {tags_csv}.", category=RuntimeWarning)
+        return {}
+    with fs.open(tags_csv, "r", newline="") as fp:
+        csv_reader = csv.reader(fp, delimiter=",")
+        tags = {row[0]: convert(row[1]) for row in list(csv_reader)[1:]}
+    return tags
+def save_hparams_to_tags_csv(tags_csv: str, hparams: Union[dict, Namespace]) -> None:
+    fs = get_filesystem(tags_csv)
+    if not fs.isdir(os.path.dirname(tags_csv)):
+        raise RuntimeError(f"Missing folder: {os.path.dirname(tags_csv)}.")
+    if isinstance(hparams, Namespace):
+        hparams = vars(hparams)
+    with fs.open(tags_csv, "w", newline="") as fp:
+        fieldnames = ["key", "value"]
+        writer = csv.DictWriter(fp, fieldnames=fieldnames)
+        writer.writerow({"key": "key", "value": "value"})
+        for k, v in hparams.items():
+            writer.writerow({"key": k, "value": v})
+def load_hparams_from_yaml(config_yaml: str, use_omegaconf: bool = True) -> Dict[str, Any]:
+    """Load hparams from a file.
+        Args:
+            config_yaml: Path to config yaml file
+            use_omegaconf: If omegaconf is available and ``use_omegaconf=True``,
+                the hparams will be converted to ``DictConfig`` if possible.
+    >>> hparams = Namespace(batch_size=32, learning_rate=0.001, data_root='./any/path/here')
+    >>> path_yaml = './testing-hparams.yaml'
+    >>> save_hparams_to_yaml(path_yaml, hparams)
+    >>> hparams_new = load_hparams_from_yaml(path_yaml)
+    >>> vars(hparams) == hparams_new
+    True
+    >>> os.remove(path_yaml)
+    """
+    fs = get_filesystem(config_yaml)
+    if not fs.exists(config_yaml):
+        rank_zero_warn(f"Missing Tags: {config_yaml}.", category=RuntimeWarning)
+        return {}
+    with fs.open(config_yaml, "r") as fp:
+        hparams = yaml.full_load(fp)
+    if _OMEGACONF_AVAILABLE:
+        if use_omegaconf:
+            try:
+                return OmegaConf.create(hparams)
+            except (UnsupportedValueType, ValidationError):
+                pass
+    return hparams
+def save_hparams_to_yaml(config_yaml, hparams: Union[dict, Namespace], use_omegaconf: bool = True) -> None:
+    """
+    Args:
+        config_yaml: path to new YAML file
+        hparams: parameters to be saved
+        use_omegaconf: If omegaconf is available and ``use_omegaconf=True``,
+            the hparams will be converted to ``DictConfig`` if possible.
+    """
+    fs = get_filesystem(config_yaml)
+    if not fs.isdir(os.path.dirname(config_yaml)):
+        raise RuntimeError(f"Missing folder: {os.path.dirname(config_yaml)}.")
+    # convert Namespace or AD to dict
+    if isinstance(hparams, Namespace):
+        hparams = vars(hparams)
+    elif isinstance(hparams, AttributeDict):
+        hparams = dict(hparams)
+    # saving with OmegaConf objects
+    if _OMEGACONF_AVAILABLE and use_omegaconf:
+        # deepcopy: hparams from user shouldn't be resolved
+        hparams = deepcopy(hparams)
+        hparams = apply_to_collection(hparams, DictConfig, OmegaConf.to_container, resolve=True)
+        with fs.open(config_yaml, "w", encoding="utf-8") as fp:
+            try:
+                OmegaConf.save(hparams, fp)
+                return
+            except (UnsupportedValueType, ValidationError):
+                pass
+    if not isinstance(hparams, dict):
+        raise TypeError("hparams must be dictionary")
+    hparams_allowed = {}
+    # drop parameters which contain some strange datatypes as fsspec
+    for k, v in hparams.items():
+        try:
+            v = v.name if isinstance(v, Enum) else v
+            yaml.dump(v)
+        except TypeError:
+            warn(f"Skipping '{k}' parameter because it is not possible to safely dump to YAML.")
+            hparams[k] = type(v).__name__
+        else:
+            hparams_allowed[k] = v
+    # saving the standard way
+    with fs.open(config_yaml, "w", newline="") as fp:
+        yaml.dump(hparams_allowed, fp)
+def convert(val: str) -> Union[int, float, bool, str]:
+    try:
+        return ast.literal_eval(val)
+    except (ValueError, SyntaxError) as err:
+        log.debug(err)
+        return val

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/distributed/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pytorch_lightning.distributed.dist import LightningDistributed  # noqa: F401

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/distributed/dist.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+import torch.distributed
+from pytorch_lightning.utilities import rank_zero_deprecation
+from pytorch_lightning.utilities.distributed import group as _group
+class LightningDistributed:
+    """
+    .. deprecated:: v1.5
+        This class is deprecated in v1.5 and will be removed in v1.7.
+        The broadcast logic will be moved to the :class:`DDPStrategy` and :class`DDPSpawnStrategy` classes.
+    """
+    def __init__(self, rank=None, device=None):
+        rank_zero_deprecation(
+            "LightningDistributed is deprecated in v1.5 and will be removed in v1.7."
+            "Broadcast logic is implemented directly in the :class:`Strategy` implementations."
+        )
+        self.rank = rank
+        self.device = device
+    def broadcast(self, obj: Any, group=_group.WORLD):
+        # always wrap into a list so it can be broadcasted.
+        obj = [obj]
+        if self.rank != 0:
+            obj = [None] * len(obj)
+        torch.distributed.broadcast_object_list(obj, 0, group=group or _group.WORLD)
+        return obj[0]

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (3.51 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/__pycache__/layer_sync.cpython-38.pyc ADDED Viewed

Binary file (3.19 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pytorch_lightning.plugins.environments.bagua_environment import BaguaEnvironment  # noqa: F401
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment  # noqa: F401
+from pytorch_lightning.plugins.environments.kubeflow_environment import KubeflowEnvironment  # noqa: F401
+from pytorch_lightning.plugins.environments.lightning_environment import LightningEnvironment  # noqa: F401
+from pytorch_lightning.plugins.environments.lsf_environment import LSFEnvironment  # noqa: F401
+from pytorch_lightning.plugins.environments.slurm_environment import SLURMEnvironment  # noqa: F401
+from pytorch_lightning.plugins.environments.torchelastic_environment import TorchElasticEnvironment  # noqa: F401

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (897 Bytes). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/__pycache__/bagua_environment.cpython-38.pyc ADDED Viewed

Binary file (2.78 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/bagua_environment.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+log = logging.getLogger(__name__)
+class BaguaEnvironment(ClusterEnvironment):
+    """Environment for distributed training with `Bagua <https://tutorials.baguasys.com/>`_"""
+    @property
+    def creates_processes_externally(self) -> bool:
+        return True
+    @property
+    def main_address(self) -> str:
+        return os.environ.get("MASTER_ADDR", "127.0.0.1")
+    @property
+    def main_port(self) -> int:
+        return int(os.environ.get("MASTER_PORT", -1))
+    @property
+    def service_port(self) -> int:
+        return int(os.environ.get("BAGUA_SERVICE_PORT", -1))
+    @staticmethod
+    def detect() -> bool:
+        return "BAGUA_SERVICE_PORT" in os.environ
+    def world_size(self) -> int:
+        return int(os.environ["WORLD_SIZE"])
+    def set_world_size(self, size: int) -> None:
+        log.debug("`BaguaEnvironment.set_world_size` was called, but setting world size is not allowed. Ignored.")
+    def global_rank(self) -> int:
+        return int(os.environ["RANK"])
+    def set_global_rank(self, rank: int) -> None:
+        log.debug("`BaguaEnvironment.set_global_rank` was called, but setting global rank is not allowed. Ignored.")
+    def local_rank(self) -> int:
+        return int(os.environ.get("LOCAL_RANK", 0))
+    def node_rank(self) -> int:
+        return int(os.environ.get("NODE_RANK", 0))

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/cluster_environment.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from typing import Any, Type
+from pytorch_lightning.utilities import rank_zero_deprecation
+class ClusterEnvironment(ABC):
+    """Specification of a cluster environment."""
+    def __new__(cls, *args: Any, **kwargs: Any) -> "ClusterEnvironment":
+        # TODO: remove in 1.7
+        _check_for_deprecated_methods(cls)
+        return super().__new__(cls)
+    @property
+    @abstractmethod
+    def creates_processes_externally(self) -> bool:
+        """Whether the environment creates the subprocesses or not."""
+    @property
+    @abstractmethod
+    def main_address(self) -> str:
+        """The main address through which all processes connect and communicate."""
+    @property
+    @abstractmethod
+    def main_port(self) -> int:
+        """An open and configured port in the main node through which all processes communicate."""
+    @staticmethod
+    @abstractmethod
+    def detect() -> bool:
+        """Detects the environment settings corresponding to this cluster and returns ``True`` if they match."""
+    @abstractmethod
+    def world_size(self) -> int:
+        """The number of processes across all devices and nodes."""
+    @abstractmethod
+    def set_world_size(self, size: int) -> None:
+        pass
+    @abstractmethod
+    def global_rank(self) -> int:
+        """The rank (index) of the currently running process across all nodes and devices."""
+    @abstractmethod
+    def set_global_rank(self, rank: int) -> None:
+        pass
+    @abstractmethod
+    def local_rank(self) -> int:
+        """The rank (index) of the currently running process inside of the current node."""
+    @abstractmethod
+    def node_rank(self) -> int:
+        """The rank (index) of the node on which the current process runs."""
+    def teardown(self) -> None:
+        """Clean up any state set after execution finishes."""
+        pass
+def _check_for_deprecated_methods(cls: Type[ClusterEnvironment]) -> None:
+    if hasattr(cls, "master_address") and callable(cls.master_address):
+        rank_zero_deprecation(
+            f"`{cls.__name__}.master_address` has been deprecated in v1.6 and will be removed in v1.7."
+            " Implement the property `main_address` instead (do not forget to add the `@property` decorator)."
+        )
+    if hasattr(cls, "master_port") and callable(cls.master_port):
+        rank_zero_deprecation(
+            f"`{cls.__name__}.master_port` has been deprecated in v1.6 and will be removed in v1.7."
+            " Implement the property `main_port` instead (do not forget to add the `@property` decorator)."
+        )

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/kubeflow_environment.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+from pytorch_lightning.utilities import rank_zero_deprecation
+log = logging.getLogger(__name__)
+class KubeflowEnvironment(ClusterEnvironment):
+    """Environment for distributed training using the `PyTorchJob`_ operator from `Kubeflow`_
+    .. _PyTorchJob: https://www.kubeflow.org/docs/components/training/pytorch/
+    .. _Kubeflow: https://www.kubeflow.org
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        # TODO: remove in 1.7
+        if hasattr(self, "is_using_kubeflow") and callable(self.is_using_kubeflow):
+            rank_zero_deprecation(
+                f"`{self.__class__.__name__}.is_using_kubeflow` has been deprecated in v1.6 and will be removed in"
+                f" v1.7. Implement the static method `detect()` instead (do not forget to add the `@staticmethod`"
+                f" decorator)."
+            )
+    @property
+    def creates_processes_externally(self) -> bool:
+        return True
+    @property
+    def main_address(self) -> str:
+        return os.environ["MASTER_ADDR"]
+    @property
+    def main_port(self) -> int:
+        return int(os.environ["MASTER_PORT"])
+    @staticmethod
+    def detect() -> bool:
+        """Returns ``True`` if the current process was launched using Kubeflow PyTorchJob."""
+        required_env_vars = {"KUBERNETES_PORT", "MASTER_ADDR", "MASTER_PORT", "WORLD_SIZE", "RANK"}
+        # torchelastic sets these. Make sure we're not in torchelastic
+        excluded_env_vars = {"GROUP_RANK", "LOCAL_RANK", "LOCAL_WORLD_SIZE"}
+        env_vars = os.environ.keys()
+        return required_env_vars.issubset(env_vars) and excluded_env_vars.isdisjoint(env_vars)
+    def world_size(self) -> int:
+        return int(os.environ["WORLD_SIZE"])
+    def set_world_size(self, size: int) -> None:
+        log.debug("KubeflowEnvironment.set_world_size was called, but setting world size is not allowed. Ignored.")
+    def global_rank(self) -> int:
+        return int(os.environ["RANK"])
+    def set_global_rank(self, rank: int) -> None:
+        log.debug("KubeflowEnvironment.set_global_rank was called, but setting global rank is not allowed. Ignored.")
+    def local_rank(self) -> int:
+        return 0
+    def node_rank(self) -> int:
+        return self.global_rank()

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/lightning_environment.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import socket
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+from pytorch_lightning.utilities.rank_zero import rank_zero_only
+class LightningEnvironment(ClusterEnvironment):
+    """The default environment used by Lightning for a single node or free cluster (not managed).
+    There are two modes the Lightning environment can operate with:
+    1.  The user only launches the main process by :code:`python train.py ...` with no additional environment variables
+        set. Lightning will spawn new worker processes for distributed training in the current node.
+    2.  The user launches all processes manually or with utilities like :code:`torch.distributed.launch`.
+        The appropriate environment variables need to be set, and at minimum :code:`LOCAL_RANK`.
+    If the main address and port are not provided, the default environment will choose them
+    automatically. It is recommended to use this default environment for single-node distributed
+    training as it provides a convenient way to launch the training script.
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self._main_port: int = -1
+        self._global_rank: int = 0
+        self._world_size: int = 1
+    @property
+    def creates_processes_externally(self) -> bool:
+        """Returns whether the cluster creates the processes or not.
+        If at least :code:`LOCAL_RANK` is available as environment variable, Lightning assumes the user acts as the
+        process launcher/job scheduler and Lightning will not launch new processes.
+        """
+        return "LOCAL_RANK" in os.environ
+    @property
+    def main_address(self) -> str:
+        return os.environ.get("MASTER_ADDR", "127.0.0.1")
+    @property
+    def main_port(self) -> int:
+        if self._main_port == -1:
+            self._main_port = int(os.environ.get("MASTER_PORT", find_free_network_port()))
+        return self._main_port
+    @staticmethod
+    def detect() -> bool:
+        return True
+    def world_size(self) -> int:
+        return self._world_size
+    def set_world_size(self, size: int) -> None:
+        self._world_size = size
+    def global_rank(self) -> int:
+        return self._global_rank
+    def set_global_rank(self, rank: int) -> None:
+        self._global_rank = rank
+        rank_zero_only.rank = rank
+    def local_rank(self) -> int:
+        return int(os.environ.get("LOCAL_RANK", 0))
+    def node_rank(self) -> int:
+        group_rank = os.environ.get("GROUP_RANK", 0)
+        return int(os.environ.get("NODE_RANK", group_rank))
+    def teardown(self) -> None:
+        if "WORLD_SIZE" in os.environ:
+            del os.environ["WORLD_SIZE"]
+def find_free_network_port() -> int:
+    """Finds a free port on localhost.
+    It is useful in single-node training when we don't want to connect to a real main node but have to set the
+    `MASTER_PORT` environment variable.
+    """
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.bind(("", 0))
+    port = s.getsockname()[1]
+    s.close()
+    return port

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/lsf_environment.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import socket
+from typing import Dict, List
+from pytorch_lightning import _logger as log
+from pytorch_lightning.plugins.environments import ClusterEnvironment
+from pytorch_lightning.utilities import rank_zero_deprecation
+from pytorch_lightning.utilities.cloud_io import get_filesystem
+class LSFEnvironment(ClusterEnvironment):
+    """An environment for running on clusters managed by the LSF resource manager.
+    It is expected that any execution using this ClusterEnvironment was executed
+    using the Job Step Manager i.e. ``jsrun``.
+    This plugin expects the following environment variables:
+    ``LSB_JOBID``
+      The LSF assigned job ID
+    ``LSB_DJOB_RANKFILE``
+      The OpenMPI compatible rank file for the LSF job
+    ``JSM_NAMESPACE_LOCAL_RANK``
+      The node local rank for the task. This environment variable is set by ``jsrun``
+    ``JSM_NAMESPACE_SIZE``
+      The world size for the task. This environment variable is set by ``jsrun``
+    ``JSM_NAMESPACE_RANK``
+      The global rank for the task. This environment variable is set by ``jsrun``
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        # TODO: remove in 1.7
+        if hasattr(self, "is_using_lsf") and callable(self.is_using_lsf):
+            rank_zero_deprecation(
+                f"`{self.__class__.__name__}.is_using_lsf` has been deprecated in v1.6 and will be removed in v1.7."
+                " Implement the static method `detect()` instead (do not forget to add the `@staticmethod` decorator)."
+            )
+        self._main_address = self._get_main_address()
+        self._main_port = self._get_main_port()
+        self._node_rank = self._get_node_rank()
+        self._set_init_progress_group_env_vars()
+    def _set_init_progress_group_env_vars(self) -> None:
+        # set environment variables needed for initializing torch distributed process group
+        os.environ["MASTER_ADDR"] = str(self._main_address)
+        log.debug(f"MASTER_ADDR: {os.environ['MASTER_ADDR']}")
+        os.environ["MASTER_PORT"] = str(self._main_port)
+        log.debug(f"MASTER_PORT: {os.environ['MASTER_PORT']}")
+    @property
+    def creates_processes_externally(self) -> bool:
+        """LSF creates subprocesses, i.e., PyTorch Lightning does not need to spawn them."""
+        return True
+    @property
+    def main_address(self) -> str:
+        """The main address is read from an OpenMPI host rank file in the environment variable
+        ``LSB_DJOB_RANKFILE``."""
+        return self._main_address
+    @property
+    def main_port(self) -> int:
+        """The main port is calculated from the LSF job ID."""
+        return self._main_port
+    @staticmethod
+    def detect() -> bool:
+        """Returns ``True`` if the current process was launched using the ``jsrun`` command."""
+        required_env_vars = {"LSB_JOBID", "LSB_DJOB_RANKFILE", "JSM_NAMESPACE_LOCAL_RANK", "JSM_NAMESPACE_SIZE"}
+        return required_env_vars.issubset(os.environ.keys())
+    def world_size(self) -> int:
+        """The world size is read from the environment variable ``JSM_NAMESPACE_SIZE``."""
+        world_size = os.environ.get("JSM_NAMESPACE_SIZE")
+        if world_size is None:
+            raise ValueError(
+                "Cannot determine world size. Environment variable `JSM_NAMESPACE_SIZE` not found."
+                "Make sure you run your executable with `jsrun`."
+            )
+        return int(world_size)
+    def set_world_size(self, size: int) -> None:
+        log.debug("LSFEnvironment.set_world_size was called, but setting world size is not allowed. Ignored.")
+    def global_rank(self) -> int:
+        """The world size is read from the environment variable ``JSM_NAMESPACE_RANK``."""
+        global_rank = os.environ.get("JSM_NAMESPACE_RANK")
+        if global_rank is None:
+            raise ValueError(
+                "Cannot determine global rank. Environment variable `JSM_NAMESPACE_RANK` not found."
+                "Make sure you run your executable with `jsrun`."
+            )
+        return int(global_rank)
+    def set_global_rank(self, rank: int) -> None:
+        log.debug("LSFEnvironment.set_global_rank was called, but setting global rank is not allowed. Ignored.")
+    def local_rank(self) -> int:
+        """The local rank is read from the environment variable `JSM_NAMESPACE_LOCAL_RANK`."""
+        local_rank = os.environ.get("JSM_NAMESPACE_LOCAL_RANK")
+        if local_rank is None:
+            raise ValueError(
+                "Cannot determine local rank. Environment variable `JSM_NAMESPACE_LOCAL_RANK` not found."
+                "Make sure you run your executable with `jsrun`."
+            )
+        return int(local_rank)
+    def node_rank(self) -> int:
+        """The node rank is determined by the position of the current hostname in the OpenMPI host rank file stored
+        in ``LSB_DJOB_RANKFILE``."""
+        return self._node_rank
+    def _get_node_rank(self) -> int:
+        """A helper method for getting the node rank.
+        The node rank is determined by the position of the current node in the list of hosts used in the job. This is
+        calculated by reading all hosts from ``LSB_DJOB_RANKFILE`` and finding this node's hostname in the list.
+        """
+        hosts = self._read_hosts()
+        count: Dict[str, int] = {}
+        for host in hosts:
+            if host not in count:
+                count[host] = len(count)
+        return count[socket.gethostname()]
+    @staticmethod
+    def _read_hosts() -> List[str]:
+        """Read compute hosts that are a part of the compute job.
+        LSF uses the Job Step Manager (JSM) to manage job steps. Job steps are executed by the JSM from "launch" nodes.
+        Each job is assigned a launch node. This launch node will be the first node in the list contained in
+        ``LSB_DJOB_RANKFILE``.
+        """
+        var = "LSB_DJOB_RANKFILE"
+        rankfile = os.environ.get(var)
+        if rankfile is None:
+            raise ValueError("Did not find the environment variable `LSB_DJOB_RANKFILE`")
+        if not rankfile:
+            raise ValueError("The environment variable `LSB_DJOB_RANKFILE` is empty")
+        fs = get_filesystem(rankfile)
+        with fs.open(rankfile, "r") as f:
+            ret = [line.strip() for line in f]
+        # remove the launch node (i.e. the first node in LSB_DJOB_RANKFILE) from the list
+        return ret[1:]
+    def _get_main_address(self) -> str:
+        """A helper for getting the main address.
+        The main address is assigned to the first node in the list of nodes used for the job.
+        """
+        hosts = self._read_hosts()
+        return hosts[0]
+    @staticmethod
+    def _get_main_port() -> int:
+        """A helper function for accessing the main port.
+        Uses the LSF job ID so all ranks can compute the main port.
+        """
+        # check for user-specified main port
+        if "MASTER_PORT" in os.environ:
+            log.debug(f"Using externally specified main port: {os.environ['MASTER_PORT']}")
+            return int(os.environ["MASTER_PORT"])
+        if "LSB_JOBID" in os.environ:
+            port = int(os.environ["LSB_JOBID"])
+            # all ports should be in the 10k+ range
+            port = port % 1000 + 10000
+            log.debug(f"calculated LSF main port: {port}")
+            return port
+        raise ValueError("Could not find job id in environment variable LSB_JOBID")

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/slurm_environment.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import re
+from typing import Optional
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+log = logging.getLogger(__name__)
+class SLURMEnvironment(ClusterEnvironment):
+    """Cluster environment for training on a cluster managed by SLURM.
+    Args:
+        auto_requeue: Whether automatic job resubmission is enabled or not. How and under which conditions a job gets
+            rescheduled gets determined by the owner of this plugin.
+    """
+    def __init__(self, auto_requeue: bool = True) -> None:
+        super().__init__()
+        self.auto_requeue = auto_requeue
+    @property
+    def creates_processes_externally(self) -> bool:
+        return True
+    @property
+    def main_address(self) -> str:
+        # figure out the root node addr
+        slurm_nodelist = os.environ.get("SLURM_NODELIST")
+        if slurm_nodelist:
+            root_node = slurm_nodelist.split(" ")[0].split(",")[0]
+        else:
+            root_node = "127.0.0.1"
+        root_node = self.resolve_root_node_address(root_node)
+        os.environ["MASTER_ADDR"] = root_node
+        log.debug(f"MASTER_ADDR: {os.environ['MASTER_ADDR']}")
+        return root_node
+    @property
+    def main_port(self) -> int:
+        # -----------------------
+        # SLURM JOB = PORT number
+        # -----------------------
+        # this way every process knows what port to use
+        job_id = os.environ.get("SLURM_JOB_ID")
+        if job_id is not None:
+            # use the last 4 numbers in the job id as the id
+            default_port = job_id[-4:]
+            # all ports should be in the 10k+ range
+            default_port = int(default_port) + 15000
+        else:
+            default_port = 12910
+        # -----------------------
+        # PORT NUMBER = MASTER_PORT
+        # -----------------------
+        # in case the user passed it in
+        if "MASTER_PORT" in os.environ:
+            default_port = int(os.environ["MASTER_PORT"])
+        else:
+            os.environ["MASTER_PORT"] = str(default_port)
+        log.debug(f"MASTER_PORT: {os.environ['MASTER_PORT']}")
+        return default_port
+    @staticmethod
+    def detect() -> bool:
+        """Returns ``True`` if the current process was launched on a SLURM cluster."""
+        return "SLURM_NTASKS" in os.environ
+    @staticmethod
+    def job_name() -> Optional[str]:
+        return os.environ.get("SLURM_JOB_NAME")
+    @staticmethod
+    def job_id() -> Optional[int]:
+        # in interactive mode, don't make logs use the same job id
+        in_slurm_interactive_mode = SLURMEnvironment.job_name() == "bash"
+        if in_slurm_interactive_mode:
+            return None
+        job_id = os.environ.get("SLURM_JOB_ID")
+        if job_id is None:
+            return None
+        try:
+            return int(job_id)
+        except ValueError:
+            return None
+    def world_size(self) -> int:
+        return int(os.environ["SLURM_NTASKS"])
+    def set_world_size(self, size: int) -> None:
+        log.debug("SLURMEnvironment.set_world_size was called, but setting world size is not allowed. Ignored.")
+    def global_rank(self) -> int:
+        return int(os.environ["SLURM_PROCID"])
+    def set_global_rank(self, rank: int) -> None:
+        log.debug("SLURMEnvironment.set_global_rank was called, but setting global rank is not allowed. Ignored.")
+    def local_rank(self) -> int:
+        return int(os.environ["SLURM_LOCALID"])
+    def node_rank(self) -> int:
+        return int(os.environ["SLURM_NODEID"])
+    def resolve_root_node_address(self, root_node: str) -> str:
+        if "[" in root_node:
+            name, numbers = root_node.split("[", maxsplit=1)
+            number = numbers.split(",", maxsplit=1)[0]
+            if "-" in number:
+                number = number.split("-")[0]
+            number = re.sub("[^0-9]", "", number)
+            root_node = name + number
+        return root_node

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/environments/torchelastic_environment.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import torch.distributed
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_9_1
+from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_warn
+log = logging.getLogger(__name__)
+class TorchElasticEnvironment(ClusterEnvironment):
+    """Environment for fault-tolerant and elastic training with `torchelastic <https://pytorch.org/elastic/>`_"""
+    def __init__(self) -> None:
+        super().__init__()
+        # TODO: remove in 1.7
+        if hasattr(self, "is_using_torchelastic") and callable(self.is_using_torchelastic):
+            rank_zero_deprecation(
+                f"`{self.__class__.__name__}.is_using_torchelastic` has been deprecated in v1.6 and will be removed in"
+                " v1.7. Implement the static method `detect()` instead (do not forget to add the `@staticmethod`"
+                " decorator)."
+            )
+    @property
+    def creates_processes_externally(self) -> bool:
+        return True
+    @property
+    def main_address(self) -> str:
+        if "MASTER_ADDR" not in os.environ:
+            rank_zero_warn("MASTER_ADDR environment variable is not defined. Set as localhost")
+            os.environ["MASTER_ADDR"] = "127.0.0.1"
+        log.debug(f"MASTER_ADDR: {os.environ['MASTER_ADDR']}")
+        return os.environ["MASTER_ADDR"]
+    @property
+    def main_port(self) -> int:
+        if "MASTER_PORT" not in os.environ:
+            rank_zero_warn("MASTER_PORT environment variable is not defined. Set as 12910")
+            os.environ["MASTER_PORT"] = "12910"
+        log.debug(f"MASTER_PORT: {os.environ['MASTER_PORT']}")
+        return int(os.environ["MASTER_PORT"])
+    @staticmethod
+    def detect() -> bool:
+        """Returns ``True`` if the current process was launched using the torchelastic command."""
+        if _TORCH_GREATER_EQUAL_1_9_1:
+            # if not available (for example on MacOS), `is_torchelastic_launched` is not defined
+            return torch.distributed.is_available() and torch.distributed.is_torchelastic_launched()
+        required_env_vars = {"RANK", "GROUP_RANK", "LOCAL_RANK", "LOCAL_WORLD_SIZE"}
+        return required_env_vars.issubset(os.environ.keys())
+    def world_size(self) -> int:
+        return int(os.environ["WORLD_SIZE"])
+    def set_world_size(self, size: int) -> None:
+        log.debug("TorchElasticEnvironment.set_world_size was called, but setting world size is not allowed. Ignored.")
+    def global_rank(self) -> int:
+        return int(os.environ["RANK"])
+    def set_global_rank(self, rank: int) -> None:
+        log.debug(
+            "TorchElasticEnvironment.set_global_rank was called, but setting global rank is not allowed. Ignored."
+        )
+    def local_rank(self) -> int:
+        return int(os.environ["LOCAL_RANK"])
+    def node_rank(self) -> int:
+        return int(os.environ.get("GROUP_RANK", 0))

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO  # noqa: F401
+from pytorch_lightning.plugins.io.hpu_plugin import HPUCheckpointIO  # noqa: F401
+from pytorch_lightning.plugins.io.torch_plugin import TorchCheckpointIO  # noqa: F401
+from pytorch_lightning.plugins.io.xla_plugin import XLACheckpointIO  # noqa: F401

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/__pycache__/xla_plugin.cpython-38.pyc ADDED Viewed

Binary file (2.38 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/checkpoint_plugin.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+from pytorch_lightning.utilities.types import _PATH
+class CheckpointIO(ABC):
+    """Interface to save/load checkpoints as they are saved through the ``Strategy``.
+    Typically most plugins either use the Torch based IO Plugin; ``TorchCheckpointIO`` but may
+    require particular handling depending on the plugin.
+    In addition, you can pass a custom ``CheckpointIO`` by extending this class and passing it
+    to the Trainer, i.e ``Trainer(plugins=[MyCustomCheckpointIO()])``.
+    .. note::
+        For some plugins, it is not possible to use a custom checkpoint plugin as checkpointing logic is not
+        modifiable.
+    """
+    @abstractmethod
+    def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_options: Optional[Any] = None) -> None:
+        """Save model/training states as a checkpoint file through state-dump and file-write.
+        Args:
+            checkpoint: dict containing model and trainer state
+            path: write-target path
+            storage_options: Optional parameters when saving the model/training states.
+        """
+    @abstractmethod
+    def load_checkpoint(self, path: _PATH, storage_options: Optional[Any] = None) -> Dict[str, Any]:
+        """Load checkpoint from a path when resuming or loading ckpt for test/validate/predict stages.
+        Args:
+            path: Path to checkpoint
+            storage_options: Optional parameters when loading the model/training states.
+        Returns: The loaded checkpoint.
+        """
+    @abstractmethod
+    def remove_checkpoint(self, path: _PATH) -> None:
+        """Remove checkpoint file from the filesystem.
+        Args:
+            path: Path to checkpoint
+        """

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/hpu_plugin.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import Any, Dict, Optional
+import torch
+from pytorch_lightning.plugins.io.torch_plugin import TorchCheckpointIO
+from pytorch_lightning.utilities.apply_func import move_data_to_device
+from pytorch_lightning.utilities.cloud_io import atomic_save, get_filesystem
+from pytorch_lightning.utilities.types import _PATH
+class HPUCheckpointIO(TorchCheckpointIO):
+    """CheckpointIO to save checkpoints for HPU training strategies."""
+    def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_options: Optional[Any] = None) -> None:
+        """Save model/training states as a checkpoint file through state-dump and file-write.
+        Args:
+            checkpoint: dict containing model and trainer state
+            path: write-target path
+            storage_options: not used in ``XLACheckpointIO.save_checkpoint``
+        Raises:
+            TypeError:
+                If ``storage_options`` arg is passed in
+        """
+        if storage_options is not None:
+            raise TypeError(
+                "`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"
+                f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"
+                " to define how you'd like to use `storage_options`."
+            )
+        fs = get_filesystem(path)
+        fs.makedirs(os.path.dirname(path), exist_ok=True)
+        checkpoint = move_data_to_device(checkpoint, torch.device("cpu"))
+        # write the checkpoint dictionary to the provided path
+        atomic_save(checkpoint, path)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/torch_plugin.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from typing import Any, Callable, Dict, Optional
+import pytorch_lightning as pl
+from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
+from pytorch_lightning.utilities.cloud_io import atomic_save, get_filesystem
+from pytorch_lightning.utilities.cloud_io import load as pl_load
+from pytorch_lightning.utilities.rank_zero import rank_zero_warn
+from pytorch_lightning.utilities.types import _PATH
+log = logging.getLogger(__name__)
+class TorchCheckpointIO(CheckpointIO):
+    """CheckpointIO that utilizes :func:`torch.save` and :func:`torch.load` to save and load checkpoints
+    respectively, common for most use cases."""
+    def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_options: Optional[Any] = None) -> None:
+        """Save model/training states as a checkpoint file through state-dump and file-write.
+        Args:
+            checkpoint: dict containing model and trainer state
+            path: write-target path
+            storage_options: not used in ``TorchCheckpointIO.save_checkpoint``
+        Raises:
+            TypeError:
+                If ``storage_options`` arg is passed in
+        """
+        if storage_options is not None:
+            raise TypeError(
+                "`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"
+                f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"
+                " to define how you'd like to use `storage_options`."
+            )
+        fs = get_filesystem(path)
+        fs.makedirs(os.path.dirname(path), exist_ok=True)
+        try:
+            # write the checkpoint dictionary on the file
+            atomic_save(checkpoint, path)
+        except AttributeError as err:
+            # todo (sean): is this try catch necessary still?
+            # https://github.com/PyTorchLightning/pytorch-lightning/pull/431
+            key = pl.LightningModule.CHECKPOINT_HYPER_PARAMS_KEY
+            checkpoint.pop(key, None)
+            rank_zero_warn(f"Warning, `{key}` dropped from checkpoint. An attribute is not picklable: {err}")
+            atomic_save(checkpoint, path)
+    def load_checkpoint(
+        self, path: _PATH, map_location: Optional[Callable] = lambda storage, loc: storage
+    ) -> Dict[str, Any]:
+        """Loads checkpoint using :func:`torch.load`, with additional handling for ``fsspec`` remote loading of
+        files.
+        Args:
+            path: Path to checkpoint
+            map_location: a function, :class:`torch.device`, string or a dict specifying how to remap storage
+            locations.
+        Returns: The loaded checkpoint.
+        Raises:
+            FileNotFoundError: If ``path`` is not found by the ``fsspec`` filesystem
+        """
+        # Try to read the checkpoint at `path`. If not exist, do not restore checkpoint.
+        fs = get_filesystem(path)
+        if not fs.exists(path):
+            raise FileNotFoundError(f"Checkpoint at {path} not found. Aborting training.")
+        return pl_load(path, map_location=map_location)
+    def remove_checkpoint(self, path: _PATH) -> None:
+        """Remove checkpoint file from the filesystem.
+        Args:
+            path: Path to checkpoint
+        """
+        fs = get_filesystem(path)
+        if fs.exists(path):
+            fs.rm(path, recursive=True)
+            log.debug(f"Removed checkpoint: {path}")

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/io/xla_plugin.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import Any, Dict, Optional
+from pytorch_lightning.plugins.io.torch_plugin import TorchCheckpointIO
+from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, _TPU_AVAILABLE
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.cloud_io import get_filesystem
+from pytorch_lightning.utilities.types import _PATH
+if _TPU_AVAILABLE:
+    import torch_xla.core.xla_model as xm
+if _OMEGACONF_AVAILABLE:
+    from omegaconf import DictConfig, ListConfig, OmegaConf
+class XLACheckpointIO(TorchCheckpointIO):
+    """CheckpointIO that utilizes :func:`xm.save` to save checkpoints for TPU training strategies."""
+    def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_options: Optional[Any] = None) -> None:
+        """Save model/training states as a checkpoint file through state-dump and file-write.
+        Args:
+            checkpoint: dict containing model and trainer state
+            path: write-target path
+            storage_options: not used in ``XLACheckpointIO.save_checkpoint``
+        Raises:
+            TypeError:
+                If ``storage_options`` arg is passed in
+        """
+        if storage_options is not None:
+            raise TypeError(
+                "`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"
+                f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"
+                " to define how you'd like to use `storage_options`."
+            )
+        fs = get_filesystem(path)
+        fs.makedirs(os.path.dirname(path), exist_ok=True)
+        # Todo: TypeError: 'mappingproxy' object does not support item assignment
+        # Ref: https://github.com/pytorch/xla/issues/2773
+        if _OMEGACONF_AVAILABLE:
+            checkpoint = apply_to_collection(checkpoint, (DictConfig, ListConfig), OmegaConf.to_container)
+        xm.save({k: v for k, v in checkpoint.items() if k != "callbacks"}, path)

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pytorch_lightning.plugins.precision.apex_amp import ApexMixedPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.deepspeed import DeepSpeedPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.fully_sharded_native_amp import (  # noqa: F401
+    FullyShardedNativeMixedPrecisionPlugin,
+)
+from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.mixed import MixedPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.tpu_bf16 import TPUBf16PrecisionPlugin  # noqa: F401

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/apex_amp.cpython-38.pyc ADDED Viewed

Binary file (3.74 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/deepspeed.cpython-38.pyc ADDED Viewed

Binary file (3.86 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/double.cpython-38.pyc ADDED Viewed

Binary file (3.99 kB). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/fully_sharded_native_amp.cpython-38.pyc ADDED Viewed

Binary file (999 Bytes). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/mixed.cpython-38.pyc ADDED Viewed

Binary file (719 Bytes). View file

my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/__pycache__/native_amp.cpython-38.pyc ADDED Viewed

Binary file (4.31 kB). View file