| |
| |
|
|
| |
| |
|
|
| |
| import atexit |
| import functools |
| import logging |
| import sys |
| import uuid |
| from typing import Any, Dict, Optional, Union |
|
|
| from hydra.utils import instantiate |
|
|
| from iopath.common.file_io import g_pathmgr |
| from numpy import ndarray |
| from torch import Tensor |
| from torch.utils.tensorboard import SummaryWriter |
|
|
| from training.utils.train_utils import get_machine_local_and_dist_rank, makedir |
|
|
| Scalar = Union[Tensor, ndarray, int, float] |
|
|
|
|
| def make_tensorboard_logger(log_dir: str, **writer_kwargs: Any): |
| makedir(log_dir) |
| summary_writer_method = SummaryWriter |
| return TensorBoardLogger( |
| path=log_dir, summary_writer_method=summary_writer_method, **writer_kwargs |
| ) |
|
|
|
|
| class TensorBoardWriterWrapper: |
| """ |
| A wrapper around a SummaryWriter object. |
| """ |
|
|
| def __init__( |
| self, |
| path: str, |
| *args: Any, |
| filename_suffix: str = None, |
| summary_writer_method: Any = SummaryWriter, |
| **kwargs: Any, |
| ) -> None: |
| """Create a new TensorBoard logger. |
| On construction, the logger creates a new events file that logs |
| will be written to. If the environment variable `RANK` is defined, |
| logger will only log if RANK = 0. |
| |
| NOTE: If using the logger with distributed training: |
| - This logger can call collective operations |
| - Logs will be written on rank 0 only |
| - Logger must be constructed synchronously *after* initializing distributed process group. |
| |
| Args: |
| path (str): path to write logs to |
| *args, **kwargs: Extra arguments to pass to SummaryWriter |
| """ |
| self._writer: Optional[SummaryWriter] = None |
| _, self._rank = get_machine_local_and_dist_rank() |
| self._path: str = path |
| if self._rank == 0: |
| logging.info( |
| f"TensorBoard SummaryWriter instantiated. Files will be stored in: {path}" |
| ) |
| self._writer = summary_writer_method( |
| log_dir=path, |
| *args, |
| filename_suffix=filename_suffix or str(uuid.uuid4()), |
| **kwargs, |
| ) |
| else: |
| logging.debug( |
| f"Not logging meters on this host because env RANK: {self._rank} != 0" |
| ) |
| atexit.register(self.close) |
|
|
| @property |
| def writer(self) -> Optional[SummaryWriter]: |
| return self._writer |
|
|
| @property |
| def path(self) -> str: |
| return self._path |
|
|
| def flush(self) -> None: |
| """Writes pending logs to disk.""" |
|
|
| if not self._writer: |
| return |
|
|
| self._writer.flush() |
|
|
| def close(self) -> None: |
| """Close writer, flushing pending logs to disk. |
| Logs cannot be written after `close` is called. |
| """ |
|
|
| if not self._writer: |
| return |
|
|
| self._writer.close() |
| self._writer = None |
|
|
|
|
| class TensorBoardLogger(TensorBoardWriterWrapper): |
| """ |
| A simple logger for TensorBoard. |
| """ |
|
|
| def log_dict(self, payload: Dict[str, Scalar], step: int) -> None: |
| """Add multiple scalar values to TensorBoard. |
| |
| Args: |
| payload (dict): dictionary of tag name and scalar value |
| step (int, Optional): step value to record |
| """ |
| if not self._writer: |
| return |
| for k, v in payload.items(): |
| self.log(k, v, step) |
|
|
| def log(self, name: str, data: Scalar, step: int) -> None: |
| """Add scalar data to TensorBoard. |
| |
| Args: |
| name (string): tag name used to group scalars |
| data (float/int/Tensor): scalar data to log |
| step (int, optional): step value to record |
| """ |
| if not self._writer: |
| return |
| self._writer.add_scalar(name, data, global_step=step, new_style=True) |
|
|
| def log_hparams( |
| self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar] |
| ) -> None: |
| """Add hyperparameter data to TensorBoard. |
| |
| Args: |
| hparams (dict): dictionary of hyperparameter names and corresponding values |
| meters (dict): dictionary of name of meter and corersponding values |
| """ |
| if not self._writer: |
| return |
| self._writer.add_hparams(hparams, meters) |
|
|
|
|
| class Logger: |
| """ |
| A logger class that can interface with multiple loggers. It now supports tensorboard only for simplicity, but you can extend it with your own logger. |
| """ |
|
|
| def __init__(self, logging_conf): |
| |
| tb_config = logging_conf.tensorboard_writer |
| tb_should_log = tb_config and tb_config.pop("should_log", True) |
| self.tb_logger = instantiate(tb_config) if tb_should_log else None |
|
|
| def log_dict(self, payload: Dict[str, Scalar], step: int) -> None: |
| if self.tb_logger: |
| self.tb_logger.log_dict(payload, step) |
|
|
| def log(self, name: str, data: Scalar, step: int) -> None: |
| if self.tb_logger: |
| self.tb_logger.log(name, data, step) |
|
|
| def log_hparams( |
| self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar] |
| ) -> None: |
| if self.tb_logger: |
| self.tb_logger.log_hparams(hparams, meters) |
|
|
|
|
| |
| |
| @functools.lru_cache(maxsize=None) |
| def _cached_log_stream(filename): |
| |
| |
| log_buffer_kb = 10 * 1024 |
| io = g_pathmgr.open(filename, mode="a", buffering=log_buffer_kb) |
| atexit.register(io.close) |
| return io |
|
|
|
|
| def setup_logging( |
| name, |
| output_dir=None, |
| rank=0, |
| log_level_primary="INFO", |
| log_level_secondary="ERROR", |
| ): |
| """ |
| Setup various logging streams: stdout and file handlers. |
| For file handlers, we only setup for the master gpu. |
| """ |
| |
| log_filename = None |
| if output_dir: |
| makedir(output_dir) |
| if rank == 0: |
| log_filename = f"{output_dir}/log.txt" |
|
|
| logger = logging.getLogger(name) |
| logger.setLevel(log_level_primary) |
|
|
| |
| FORMAT = "%(levelname)s %(asctime)s %(filename)s:%(lineno)4d: %(message)s" |
| formatter = logging.Formatter(FORMAT) |
|
|
| |
| for h in logger.handlers: |
| logger.removeHandler(h) |
| logger.root.handlers = [] |
|
|
| |
| console_handler = logging.StreamHandler(sys.stdout) |
| console_handler.setFormatter(formatter) |
| logger.addHandler(console_handler) |
| if rank == 0: |
| console_handler.setLevel(log_level_primary) |
| else: |
| console_handler.setLevel(log_level_secondary) |
|
|
| |
| if log_filename and rank == 0: |
| file_handler = logging.StreamHandler(_cached_log_stream(log_filename)) |
| file_handler.setLevel(log_level_primary) |
| file_handler.setFormatter(formatter) |
| logger.addHandler(file_handler) |
|
|
| logging.root = logger |
|
|
|
|
| def shutdown_logging(): |
| """ |
| After training is done, we ensure to shut down all the logger streams. |
| """ |
| logging.info("Shutting down loggers...") |
| handlers = logging.root.handlers |
| for handler in handlers: |
| handler.close() |
|
|