| | |
| | import datetime |
| | import json |
| | import logging |
| | import os |
| | import time |
| | from collections import defaultdict |
| | from contextlib import contextmanager |
| | from functools import cached_property |
| | from typing import Optional |
| | import torch |
| | from fvcore.common.history_buffer import HistoryBuffer |
| |
|
| | from detectron2.utils.file_io import PathManager |
| |
|
| | __all__ = [ |
| | "get_event_storage", |
| | "has_event_storage", |
| | "JSONWriter", |
| | "TensorboardXWriter", |
| | "CommonMetricPrinter", |
| | "EventStorage", |
| | ] |
| |
|
| | _CURRENT_STORAGE_STACK = [] |
| |
|
| |
|
| | def get_event_storage(): |
| | """ |
| | Returns: |
| | The :class:`EventStorage` object that's currently being used. |
| | Throws an error if no :class:`EventStorage` is currently enabled. |
| | """ |
| | assert len( |
| | _CURRENT_STORAGE_STACK |
| | ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" |
| | return _CURRENT_STORAGE_STACK[-1] |
| |
|
| |
|
| | def has_event_storage(): |
| | """ |
| | Returns: |
| | Check if there are EventStorage() context existed. |
| | """ |
| | return len(_CURRENT_STORAGE_STACK) > 0 |
| |
|
| |
|
| | class EventWriter: |
| | """ |
| | Base class for writers that obtain events from :class:`EventStorage` and process them. |
| | """ |
| |
|
| | def write(self): |
| | raise NotImplementedError |
| |
|
| | def close(self): |
| | pass |
| |
|
| |
|
| | class JSONWriter(EventWriter): |
| | """ |
| | Write scalars to a json file. |
| | |
| | It saves scalars as one json per line (instead of a big json) for easy parsing. |
| | |
| | Examples parsing such a json file: |
| | :: |
| | $ cat metrics.json | jq -s '.[0:2]' |
| | [ |
| | { |
| | "data_time": 0.008433341979980469, |
| | "iteration": 19, |
| | "loss": 1.9228371381759644, |
| | "loss_box_reg": 0.050025828182697296, |
| | "loss_classifier": 0.5316952466964722, |
| | "loss_mask": 0.7236229181289673, |
| | "loss_rpn_box": 0.0856662318110466, |
| | "loss_rpn_cls": 0.48198649287223816, |
| | "lr": 0.007173333333333333, |
| | "time": 0.25401854515075684 |
| | }, |
| | { |
| | "data_time": 0.007216215133666992, |
| | "iteration": 39, |
| | "loss": 1.282649278640747, |
| | "loss_box_reg": 0.06222952902317047, |
| | "loss_classifier": 0.30682939291000366, |
| | "loss_mask": 0.6970193982124329, |
| | "loss_rpn_box": 0.038663312792778015, |
| | "loss_rpn_cls": 0.1471673548221588, |
| | "lr": 0.007706666666666667, |
| | "time": 0.2490077018737793 |
| | } |
| | ] |
| | |
| | $ cat metrics.json | jq '.loss_mask' |
| | 0.7126231789588928 |
| | 0.689423680305481 |
| | 0.6776131987571716 |
| | ... |
| | |
| | """ |
| |
|
| | def __init__(self, json_file, window_size=20): |
| | """ |
| | Args: |
| | json_file (str): path to the json file. New data will be appended if the file exists. |
| | window_size (int): the window size of median smoothing for the scalars whose |
| | `smoothing_hint` are True. |
| | """ |
| | self._file_handle = PathManager.open(json_file, "a") |
| | self._window_size = window_size |
| | self._last_write = -1 |
| |
|
| | def write(self): |
| | storage = get_event_storage() |
| | to_save = defaultdict(dict) |
| |
|
| | for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items(): |
| | |
| | if iter <= self._last_write: |
| | continue |
| | to_save[iter][k] = v |
| | if len(to_save): |
| | all_iters = sorted(to_save.keys()) |
| | self._last_write = max(all_iters) |
| |
|
| | for itr, scalars_per_iter in to_save.items(): |
| | scalars_per_iter["iteration"] = itr |
| | self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") |
| | self._file_handle.flush() |
| | try: |
| | os.fsync(self._file_handle.fileno()) |
| | except AttributeError: |
| | pass |
| |
|
| | def close(self): |
| | self._file_handle.close() |
| |
|
| |
|
| | class TensorboardXWriter(EventWriter): |
| | """ |
| | Write all scalars to a tensorboard file. |
| | """ |
| |
|
| | def __init__(self, log_dir: str, window_size: int = 20, **kwargs): |
| | """ |
| | Args: |
| | log_dir (str): the directory to save the output events |
| | window_size (int): the scalars will be median-smoothed by this window size |
| | |
| | kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` |
| | """ |
| | self._window_size = window_size |
| | self._writer_args = {"log_dir": log_dir, **kwargs} |
| | self._last_write = -1 |
| |
|
| | @cached_property |
| | def _writer(self): |
| | from torch.utils.tensorboard import SummaryWriter |
| |
|
| | return SummaryWriter(**self._writer_args) |
| |
|
| | def write(self): |
| | storage = get_event_storage() |
| | new_last_write = self._last_write |
| | for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items(): |
| | if iter > self._last_write: |
| | self._writer.add_scalar(k, v, iter) |
| | new_last_write = max(new_last_write, iter) |
| | self._last_write = new_last_write |
| |
|
| | |
| | |
| | if len(storage._vis_data) >= 1: |
| | for img_name, img, step_num in storage._vis_data: |
| | self._writer.add_image(img_name, img, step_num) |
| | |
| | |
| | |
| | |
| | |
| | storage.clear_images() |
| |
|
| | if len(storage._histograms) >= 1: |
| | for params in storage._histograms: |
| | self._writer.add_histogram_raw(**params) |
| | storage.clear_histograms() |
| |
|
| | def close(self): |
| | if "_writer" in self.__dict__: |
| | self._writer.close() |
| |
|
| |
|
| | class CommonMetricPrinter(EventWriter): |
| | """ |
| | Print **common** metrics to the terminal, including |
| | iteration time, ETA, memory, all losses, and the learning rate. |
| | It also applies smoothing using a window of 20 elements. |
| | |
| | It's meant to print common metrics in common ways. |
| | To print something in more customized ways, please implement a similar printer by yourself. |
| | """ |
| |
|
| | def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): |
| | """ |
| | Args: |
| | max_iter: the maximum number of iterations to train. |
| | Used to compute ETA. If not given, ETA will not be printed. |
| | window_size (int): the losses will be median-smoothed by this window size |
| | """ |
| | self.logger = logging.getLogger("detectron2.utils.events") |
| | self._max_iter = max_iter |
| | self._window_size = window_size |
| | self._last_write = None |
| |
|
| | def _get_eta(self, storage) -> Optional[str]: |
| | if self._max_iter is None: |
| | return "" |
| | iteration = storage.iter |
| | try: |
| | eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration - 1) |
| | storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) |
| | return str(datetime.timedelta(seconds=int(eta_seconds))) |
| | except KeyError: |
| | |
| | eta_string = None |
| | if self._last_write is not None: |
| | estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( |
| | iteration - self._last_write[0] |
| | ) |
| | eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) |
| | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) |
| | self._last_write = (iteration, time.perf_counter()) |
| | return eta_string |
| |
|
| | def write(self): |
| | storage = get_event_storage() |
| | iteration = storage.iter |
| | if iteration == self._max_iter: |
| | |
| | |
| | |
| | return |
| |
|
| | try: |
| | avg_data_time = storage.history("data_time").avg( |
| | storage.count_samples("data_time", self._window_size) |
| | ) |
| | last_data_time = storage.history("data_time").latest() |
| | except KeyError: |
| | |
| | |
| | avg_data_time = None |
| | last_data_time = None |
| | try: |
| | avg_iter_time = storage.history("time").global_avg() |
| | last_iter_time = storage.history("time").latest() |
| | except KeyError: |
| | avg_iter_time = None |
| | last_iter_time = None |
| | try: |
| | lr = "{:.5g}".format(storage.history("lr").latest()) |
| | except KeyError: |
| | lr = "N/A" |
| |
|
| | eta_string = self._get_eta(storage) |
| |
|
| | if torch.cuda.is_available(): |
| | max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 |
| | else: |
| | max_mem_mb = None |
| |
|
| | |
| | self.logger.info( |
| | str.format( |
| | " {eta}iter: {iter} {losses} {non_losses} {avg_time}{last_time}" |
| | + "{avg_data_time}{last_data_time} lr: {lr} {memory}", |
| | eta=f"eta: {eta_string} " if eta_string else "", |
| | iter=iteration, |
| | losses=" ".join( |
| | [ |
| | "{}: {:.4g}".format( |
| | k, v.median(storage.count_samples(k, self._window_size)) |
| | ) |
| | for k, v in storage.histories().items() |
| | if "loss" in k |
| | ] |
| | ), |
| | non_losses=" ".join( |
| | [ |
| | "{}: {:.4g}".format( |
| | k, v.median(storage.count_samples(k, self._window_size)) |
| | ) |
| | for k, v in storage.histories().items() |
| | if "[metric]" in k |
| | ] |
| | ), |
| | avg_time=( |
| | "time: {:.4f} ".format(avg_iter_time) if avg_iter_time is not None else "" |
| | ), |
| | last_time=( |
| | "last_time: {:.4f} ".format(last_iter_time) |
| | if last_iter_time is not None |
| | else "" |
| | ), |
| | avg_data_time=( |
| | "data_time: {:.4f} ".format(avg_data_time) if avg_data_time is not None else "" |
| | ), |
| | last_data_time=( |
| | "last_data_time: {:.4f} ".format(last_data_time) |
| | if last_data_time is not None |
| | else "" |
| | ), |
| | lr=lr, |
| | memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "", |
| | ) |
| | ) |
| |
|
| |
|
| | class EventStorage: |
| | """ |
| | The user-facing class that provides metric storage functionalities. |
| | |
| | In the future we may add support for storing / logging other types of data if needed. |
| | """ |
| |
|
| | def __init__(self, start_iter=0): |
| | """ |
| | Args: |
| | start_iter (int): the iteration number to start with |
| | """ |
| | self._history = defaultdict(HistoryBuffer) |
| | self._smoothing_hints = {} |
| | self._latest_scalars = {} |
| | self._iter = start_iter |
| | self._current_prefix = "" |
| | self._vis_data = [] |
| | self._histograms = [] |
| |
|
| | def put_image(self, img_name, img_tensor): |
| | """ |
| | Add an `img_tensor` associated with `img_name`, to be shown on |
| | tensorboard. |
| | |
| | Args: |
| | img_name (str): The name of the image to put into tensorboard. |
| | img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` |
| | Tensor of shape `[channel, height, width]` where `channel` is |
| | 3. The image format should be RGB. The elements in img_tensor |
| | can either have values in [0, 1] (float32) or [0, 255] (uint8). |
| | The `img_tensor` will be visualized in tensorboard. |
| | """ |
| | self._vis_data.append((img_name, img_tensor, self._iter)) |
| |
|
| | def put_scalar(self, name, value, smoothing_hint=True, cur_iter=None): |
| | """ |
| | Add a scalar `value` to the `HistoryBuffer` associated with `name`. |
| | |
| | Args: |
| | smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be |
| | smoothed when logged. The hint will be accessible through |
| | :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint |
| | and apply custom smoothing rule. |
| | |
| | It defaults to True because most scalars we save need to be smoothed to |
| | provide any useful signal. |
| | cur_iter (int): an iteration number to set explicitly instead of current iteration |
| | """ |
| | name = self._current_prefix + name |
| | cur_iter = self._iter if cur_iter is None else cur_iter |
| | history = self._history[name] |
| | value = float(value) |
| | history.update(value, cur_iter) |
| | self._latest_scalars[name] = (value, cur_iter) |
| |
|
| | existing_hint = self._smoothing_hints.get(name) |
| |
|
| | if existing_hint is not None: |
| | assert ( |
| | existing_hint == smoothing_hint |
| | ), "Scalar {} was put with a different smoothing_hint!".format(name) |
| | else: |
| | self._smoothing_hints[name] = smoothing_hint |
| |
|
| | def put_scalars(self, *, smoothing_hint=True, cur_iter=None, **kwargs): |
| | """ |
| | Put multiple scalars from keyword arguments. |
| | |
| | Examples: |
| | |
| | storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) |
| | """ |
| | for k, v in kwargs.items(): |
| | self.put_scalar(k, v, smoothing_hint=smoothing_hint, cur_iter=cur_iter) |
| |
|
| | def put_histogram(self, hist_name, hist_tensor, bins=1000): |
| | """ |
| | Create a histogram from a tensor. |
| | |
| | Args: |
| | hist_name (str): The name of the histogram to put into tensorboard. |
| | hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted |
| | into a histogram. |
| | bins (int): Number of histogram bins. |
| | """ |
| | ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() |
| |
|
| | |
| | hist_counts = torch.histc(hist_tensor, bins=bins) |
| | hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) |
| |
|
| | |
| | hist_params = dict( |
| | tag=hist_name, |
| | min=ht_min, |
| | max=ht_max, |
| | num=len(hist_tensor), |
| | sum=float(hist_tensor.sum()), |
| | sum_squares=float(torch.sum(hist_tensor**2)), |
| | bucket_limits=hist_edges[1:].tolist(), |
| | bucket_counts=hist_counts.tolist(), |
| | global_step=self._iter, |
| | ) |
| | self._histograms.append(hist_params) |
| |
|
| | def history(self, name): |
| | """ |
| | Returns: |
| | HistoryBuffer: the scalar history for name |
| | """ |
| | ret = self._history.get(name, None) |
| | if ret is None: |
| | raise KeyError("No history metric available for {}!".format(name)) |
| | return ret |
| |
|
| | def histories(self): |
| | """ |
| | Returns: |
| | dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars |
| | """ |
| | return self._history |
| |
|
| | def latest(self): |
| | """ |
| | Returns: |
| | dict[str -> (float, int)]: mapping from the name of each scalar to the most |
| | recent value and the iteration number its added. |
| | """ |
| | return self._latest_scalars |
| |
|
| | def latest_with_smoothing_hint(self, window_size=20): |
| | """ |
| | Similar to :meth:`latest`, but the returned values |
| | are either the un-smoothed original latest value, |
| | or a median of the given window_size, |
| | depend on whether the smoothing_hint is True. |
| | |
| | This provides a default behavior that other writers can use. |
| | |
| | Note: All scalars saved in the past `window_size` iterations are used for smoothing. |
| | This is different from the `window_size` definition in HistoryBuffer. |
| | Use :meth:`get_history_window_size` to get the `window_size` used in HistoryBuffer. |
| | """ |
| | result = {} |
| | for k, (v, itr) in self._latest_scalars.items(): |
| | result[k] = ( |
| | ( |
| | self._history[k].median(self.count_samples(k, window_size)) |
| | if self._smoothing_hints[k] |
| | else v |
| | ), |
| | itr, |
| | ) |
| | return result |
| |
|
| | def count_samples(self, name, window_size=20): |
| | """ |
| | Return the number of samples logged in the past `window_size` iterations. |
| | """ |
| | samples = 0 |
| | data = self._history[name].values() |
| | for _, iter_ in reversed(data): |
| | if iter_ > data[-1][1] - window_size: |
| | samples += 1 |
| | else: |
| | break |
| | return samples |
| |
|
| | def smoothing_hints(self): |
| | """ |
| | Returns: |
| | dict[name -> bool]: the user-provided hint on whether the scalar |
| | is noisy and needs smoothing. |
| | """ |
| | return self._smoothing_hints |
| |
|
| | def step(self): |
| | """ |
| | User should either: (1) Call this function to increment storage.iter when needed. Or |
| | (2) Set `storage.iter` to the correct iteration number before each iteration. |
| | |
| | The storage will then be able to associate the new data with an iteration number. |
| | """ |
| | self._iter += 1 |
| |
|
| | @property |
| | def iter(self): |
| | """ |
| | Returns: |
| | int: The current iteration number. When used together with a trainer, |
| | this is ensured to be the same as trainer.iter. |
| | """ |
| | return self._iter |
| |
|
| | @iter.setter |
| | def iter(self, val): |
| | self._iter = int(val) |
| |
|
| | @property |
| | def iteration(self): |
| | |
| | return self._iter |
| |
|
| | def __enter__(self): |
| | _CURRENT_STORAGE_STACK.append(self) |
| | return self |
| |
|
| | def __exit__(self, exc_type, exc_val, exc_tb): |
| | assert _CURRENT_STORAGE_STACK[-1] == self |
| | _CURRENT_STORAGE_STACK.pop() |
| |
|
| | @contextmanager |
| | def name_scope(self, name): |
| | """ |
| | Yields: |
| | A context within which all the events added to this storage |
| | will be prefixed by the name scope. |
| | """ |
| | old_prefix = self._current_prefix |
| | self._current_prefix = name.rstrip("/") + "/" |
| | yield |
| | self._current_prefix = old_prefix |
| |
|
| | def clear_images(self): |
| | """ |
| | Delete all the stored images for visualization. This should be called |
| | after images are written to tensorboard. |
| | """ |
| | self._vis_data = [] |
| |
|
| | def clear_histograms(self): |
| | """ |
| | Delete all the stored histograms for visualization. |
| | This should be called after histograms are written to tensorboard. |
| | """ |
| | self._histograms = [] |
| |
|