E2E_SCSI / train_utils /logger.py
kungchuking's picture
Copied from github repository.
2c76547
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import logging
import os
from torch.utils.tensorboard import SummaryWriter
class Logger:
SUM_FREQ = 100
def __init__(self, model, scheduler, ckpt_path):
self.model = model
self.scheduler = scheduler
self.total_steps = 0
self.running_loss = {}
self.ckpt_path = ckpt_path
self.writer = SummaryWriter(log_dir=os.path.join(self.ckpt_path, "runs"))
def _print_training_status(self):
metrics_data = [
self.running_loss[k] / Logger.SUM_FREQ
for k in sorted(self.running_loss.keys())
]
training_str = "[{:6d}] ".format(self.total_steps + 1)
metrics_str = ("{:10.4f}, " * len(metrics_data)).format(*metrics_data)
# print the training status
logging.info(
f"Training Metrics ({self.total_steps}): {training_str + metrics_str}"
)
if self.writer is None:
self.writer = SummaryWriter(log_dir=os.path.join(self.ckpt_path, "runs"))
for k in self.running_loss:
self.writer.add_scalar(
k, self.running_loss[k] / Logger.SUM_FREQ, self.total_steps
)
self.running_loss[k] = 0.0
def push(self, metrics, task):
for key in metrics:
task_key = str(key) + "_" + task
if task_key not in self.running_loss:
self.running_loss[task_key] = 0.0
self.running_loss[task_key] += metrics[key]
def update(self):
self.total_steps += 1
if self.total_steps % Logger.SUM_FREQ == Logger.SUM_FREQ - 1:
self._print_training_status()
self.running_loss = {}
def write_dict(self, results):
if self.writer is None:
self.writer = SummaryWriter(log_dir=os.path.join(self.ckpt_path, "runs"))
for key in results:
self.writer.add_scalar(key, results[key], self.total_steps)
def close(self):
self.writer.close()