File size: 2,225 Bytes
d7b3a74 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | import datetime
import logging
import os
from slime.utils.misc import SingletonMeta
try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
SummaryWriter = None
__all__ = ["_TensorboardAdapter"]
logger = logging.getLogger(__name__)
class _TensorboardAdapter(metaclass=SingletonMeta):
_writer = None
"""
# Usage example: This will return the same instance every rank
# tb = _TensorboardAdapter(args) # Initialize on first call
# tb.log({"Loss": 0.1}, step=1)
# In other files:
# from tensorboard_utils import _TensorboardAdapter
# tb = _TensorboardAdapter(args) # No parameters needed to get existing instance
# tb.log({"Accuracy": 0.9}, step=1)
"""
def __init__(self, args):
assert args.use_tensorboard, f"{args.use_tensorboard=}"
tb_project_name = args.tb_project_name
tb_experiment_name = args.tb_experiment_name
if tb_project_name is not None or os.environ.get("TENSORBOARD_DIR", None):
if tb_project_name is not None and tb_experiment_name is None:
tb_experiment_name = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
self._initialize(tb_project_name, tb_experiment_name)
else:
raise ValueError("tb_project_name and tb_experiment_name, or TENSORBOARD_DIR are required")
def _initialize(self, tb_project_name, tb_experiment_name):
"""Actual initialization logic"""
# Get tensorboard directory from environment variable or use default path
tensorboard_dir = os.environ.get("TENSORBOARD_DIR", f"tensorboard_log/{tb_project_name}/{tb_experiment_name}")
os.makedirs(tensorboard_dir, exist_ok=True)
logger.info(f"Saving tensorboard log to {tensorboard_dir}.")
self._writer = SummaryWriter(tensorboard_dir)
def log(self, data, step):
"""Log data to tensorboard
Args:
data (dict): Dictionary containing metric names and values
step (int): Current step/epoch number
"""
for key in data:
self._writer.add_scalar(key, data[key], step)
def finish(self):
"""Close the tensorboard writer"""
self._writer.close()
|