bellmake's picture
SAM3 Video Segmentation - Clean deployment
14114e8
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
import atexit
import functools
import logging
import sys
import uuid
from typing import Any, Dict, Optional, Union
from hydra.utils import instantiate
from iopath.common.file_io import g_pathmgr
from numpy import ndarray
from sam3.train.utils.train_utils import get_machine_local_and_dist_rank, makedir
from torch import Tensor
from torch.utils.tensorboard import SummaryWriter
Scalar = Union[Tensor, ndarray, int, float]
def make_tensorboard_logger(log_dir: str, **writer_kwargs: Any):
makedir(log_dir)
summary_writer_method = SummaryWriter
return TensorBoardLogger(
path=log_dir, summary_writer_method=summary_writer_method, **writer_kwargs
)
class TensorBoardWriterWrapper:
"""
A wrapper around a SummaryWriter object.
"""
def __init__(
self,
path: str,
*args: Any,
filename_suffix: str = None,
summary_writer_method: Any = SummaryWriter,
**kwargs: Any,
) -> None:
"""Create a new TensorBoard logger.
On construction, the logger creates a new events file that logs
will be written to. If the environment variable `RANK` is defined,
logger will only log if RANK = 0.
NOTE: If using the logger with distributed training:
- This logger can call collective operations
- Logs will be written on rank 0 only
- Logger must be constructed synchronously *after* initializing distributed process group.
Args:
path (str): path to write logs to
*args, **kwargs: Extra arguments to pass to SummaryWriter
"""
self._writer: Optional[SummaryWriter] = None
_, self._rank = get_machine_local_and_dist_rank()
self._path: str = path
if self._rank == 0:
logging.info(
f"TensorBoard SummaryWriter instantiated. Files will be stored in: {path}"
)
self._writer = summary_writer_method(
log_dir=path,
*args,
filename_suffix=filename_suffix or str(uuid.uuid4()),
**kwargs,
)
else:
logging.debug(
f"Not logging meters on this host because env RANK: {self._rank} != 0"
)
atexit.register(self.close)
@property
def writer(self) -> Optional[SummaryWriter]:
return self._writer
@property
def path(self) -> str:
return self._path
def flush(self) -> None:
"""Writes pending logs to disk."""
if not self._writer:
return
self._writer.flush()
def close(self) -> None:
"""Close writer, flushing pending logs to disk.
Logs cannot be written after `close` is called.
"""
if not self._writer:
return
self._writer.close()
self._writer = None
class TensorBoardLogger(TensorBoardWriterWrapper):
"""
A simple logger for TensorBoard.
"""
def log_dict(self, payload: Dict[str, Scalar], step: int) -> None:
"""Add multiple scalar values to TensorBoard.
Args:
payload (dict): dictionary of tag name and scalar value
step (int, Optional): step value to record
"""
if not self._writer:
return
for k, v in payload.items():
self.log(k, v, step)
def log(self, name: str, data: Scalar, step: int) -> None:
"""Add scalar data to TensorBoard.
Args:
name (string): tag name used to group scalars
data (float/int/Tensor): scalar data to log
step (int, optional): step value to record
"""
if not self._writer:
return
self._writer.add_scalar(name, data, global_step=step, new_style=True)
def log_hparams(
self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar]
) -> None:
"""Add hyperparameter data to TensorBoard.
Args:
hparams (dict): dictionary of hyperparameter names and corresponding values
meters (dict): dictionary of name of meter and corersponding values
"""
if not self._writer:
return
self._writer.add_hparams(hparams, meters)
class Logger:
"""
A logger class that can interface with multiple loggers. It now supports tensorboard only for simplicity, but you can extend it with your own logger.
"""
def __init__(self, logging_conf):
# allow turning off TensorBoard with "should_log: false" in config
tb_config = logging_conf.tensorboard_writer
tb_should_log = tb_config and tb_config.pop("should_log", True)
self.tb_logger = instantiate(tb_config) if tb_should_log else None
def log_dict(self, payload: Dict[str, Scalar], step: int) -> None:
if self.tb_logger:
self.tb_logger.log_dict(payload, step)
def log(self, name: str, data: Scalar, step: int) -> None:
if self.tb_logger:
self.tb_logger.log(name, data, step)
def log_hparams(
self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar]
) -> None:
if self.tb_logger:
self.tb_logger.log_hparams(hparams, meters)
# cache the opened file object, so that different calls to `setup_logger`
# with the same file name can safely write to the same file.
@functools.lru_cache(maxsize=None)
def _cached_log_stream(filename):
# we tune the buffering value so that the logs are updated
# frequently.
log_buffer_kb = 10 * 1024 # 10KB
io = g_pathmgr.open(filename, mode="a", buffering=log_buffer_kb)
atexit.register(io.close)
return io
def setup_logging(
name,
output_dir=None,
rank=0,
log_level_primary="INFO",
log_level_secondary="ERROR",
):
"""
Setup various logging streams: stdout and file handlers.
For file handlers, we only setup for the master gpu.
"""
# get the filename if we want to log to the file as well
log_filename = None
if output_dir:
makedir(output_dir)
if rank == 0:
log_filename = f"{output_dir}/log.txt"
logger = logging.getLogger(name)
logger.setLevel(log_level_primary)
# create formatter
FORMAT = "%(levelname)s %(asctime)s %(filename)s:%(lineno)4d: %(message)s"
formatter = logging.Formatter(FORMAT)
# Cleanup any existing handlers
for h in logger.handlers:
logger.removeHandler(h)
logger.root.handlers = []
# setup the console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
if rank == 0:
console_handler.setLevel(log_level_primary)
else:
console_handler.setLevel(log_level_secondary)
# we log to file as well if user wants
if log_filename and rank == 0:
file_handler = logging.StreamHandler(_cached_log_stream(log_filename))
file_handler.setLevel(log_level_primary)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logging.root = logger
def shutdown_logging():
"""
After training is done, we ensure to shut down all the logger streams.
"""
logging.info("Shutting down loggers...")
handlers = logging.root.handlers
for handler in handlers:
handler.close()