import torch import logging from typing import Any, List, Tuple, Dict from torchmetrics import MaxMetric, MeanMetric from torchmetrics.classification import BinaryAccuracy, BinaryF1Score from torch_geometric.nn.pool.consecutive import consecutive_cluster from src.utils import init_weights, PanopticSegmentationOutput, \ PartitionParameterSearchStorage from src.metrics import MeanAveragePrecision3D, PanopticQuality3D, \ ConfusionMatrix from src.models.semantic import SemanticSegmentationModule from src.loss import BCEWithLogitsLoss from src.data import NAG log = logging.getLogger(__name__) __all__ = ['PanopticSegmentationModule'] class PanopticSegmentationModule(SemanticSegmentationModule): """A LightningModule for panoptic segmentation of point clouds. :param net: torch.nn.Module Backbone model. This can typically be an `SPT` object :param edge_affinity_head: torch.nn.Module Edge affinity prediction head for instance/panoptic graph clustering. This is typically an MLP :param partitioner: src.nn.instance.InstancePartitioner Instance partition head, expects a fully-fledged `InstancePartitioner` module as input. This module is only called when the actual instance/panoptic segmentation is required. At train time, it is not essential, since we do not propagate gradient to its parameters. However, we may still tune its parameters to maximize instance/panoptic metrics on the train set. This tuning involves a simple grid-search on a small range of parameters and needs to be called at least once at the very end of training :param criterion: torch.nn._Loss Loss :param optimizer: torch.optim.Optimizer Optimizer :param scheduler: torch.optim.lr_scheduler.LRScheduler Learning rate scheduler :param num_classes: int Number of classes in the dataset :param stuff_classes: List[int] Indices of the classes to be treated as 'stuff', as opposed to 'thing' :param class_names: List[str] Name for each class :param sampling_loss: bool If True, the target labels will be obtained from labels of the points sampled in the batch at hand. This affects training supervision where sampling augmentations may be used for dropping some points or superpoints. If False, the target labels will be based on exact superpoint-wise histograms of labels computed at preprocessing time, disregarding potential level-0 point down-sampling :param loss_type: str Type of loss applied. 'ce': cross-entropy (if `multi_stage_loss_lambdas` is used, all 1+ levels will be supervised with cross-entropy). 'kl': Kullback-Leibler divergence (if `multi_stage_loss_lambdas` is used, all 1+ levels will be supervised with cross-entropy). 'ce_kl': cross-entropy on level 1 and Kullback-Leibler for all levels above 'wce': not documented for now 'wce_kl': not documented for now :param weighted_loss: bool If True, the loss will be weighted based on the class frequencies computed on the train dataset. See `BaseDataset.get_class_weight()` for more :param init_linear: str Initialization method for all linear layers. Supports 'xavier_uniform', 'xavier_normal', 'kaiming_uniform', 'kaiming_normal', 'trunc_normal' :param init_rpe: str Initialization method for all linear layers producing relative positional encodings. Supports 'xavier_uniform', 'xavier_normal', 'kaiming_uniform', 'kaiming_normal', 'trunc_normal' :param transformer_lr_scale: float Scaling parameter applied to the learning rate for the `TransformerBlock` in each `Stage` and for the pooling block in `DownNFuseStage` modules. Setting this to a value lower than 1 mitigates exploding gradients in attentive blocks during training :param multi_stage_loss_lambdas: List[float] List of weights for combining losses computed on the output of each partition level. If not specified, the loss will be computed on the level 1 outputs only :param edge_affinity_criterion: torch.nn._Loss Loss on the edges of the superpoint level 1 for affinity prediction :param edge_affinity_loss_weights: List[float] Weights for insisting on certain cases in the edge affinity loss: - 0: same-class same-object edges - 1: same-class different-object edges - 2: different-class same-object edges - 3: different-class different-object edges :param edge_affinity_loss_lambda: float Weight for combining the semantic segmentation loss with the node offset and edge affinity losses. The final loss will be: `L_node_classif + edge_affinity_loss_lambda * L_edge_affinity + node_offset_loss_lambda * L_node_offset` :param node_offset_criterion: torch.nn._Loss Loss on the nodes of the superpoint level 1 for node offset prediction :param node_offset_loss_lambda: float Weight for combining the semantic segmentation loss with the node offset and edge affinity losses. The final loss will be: `L_node_classif + edge_affinity_loss_lambda * L_edge_affinity + node_offset_loss_lambda * L_node_offset` :param gc_every_n_steps: int Explicitly call the garbage collector after a certain number of steps. May involve a computation overhead. Mostly hear for debugging purposes when observing suspicious GPU memory increase during training :param track_val_every_n_epoch: int If specified, the output for a validation batch of interest specified with `track_val_idx` will be stored to disk every `track_val_every_n_epoch` epochs. Must be a multiple of `check_val_every_n_epoch`. See `track_batch()` for more :param track_val_idx: int If specified, the output for the `track_val_idx`th validation batch will be saved to disk periodically based on `track_val_every_n_epoch`. Importantly, this index is expected to match the `Dataloader`'s index wrt the current epoch and NOT an index wrt the `Dataset`. Said otherwise, if the `Dataloader(shuffle=True)` then, the stored batch will not be the same at each epoch. For this reason, if tracking the same object across training is needed, the `Dataloader` and the transforms should be free from any stochasticity :param track_test_idx: If specified, the output for the `track_test_idx`th test batch will be saved to disk. If `track_test_idx=-1`, predictions for the entire test set will be saved to disk :param min_instance_size: int Minimum target instance size to consider when computing the metrics. If a target is smaller, it will be ignored, as well as its matched prediction, if any. See `MeanAveragePrecision3D` :param partition_every_n_epoch: int Since we do not need to compute the actual panoptic/instance segmentation to train the model, we can simply do so once in a while to track the training and validation metrics. This parameter rules the frequency at which the panoptic/instance partition and metrics are computed during training :param no_instance_metrics: bool Whether instance segmentation metrics should be computed. These may incur an overhead. Besides, the SuperCluster formulation is mainly targeted for panoptic segmentation, as the model is not specifically trained to maximize instance metrics, which, among other things, involve predicting an instance confidence score :param no_instance_metrics_on_train_set: bool Same as `no_instance_metrics` but specifically for the train set. This is in case we still want the instance metrics every partition_every_n_epoch` on the validation set, but want to avoid the compute overhead of computing the instance partition and metrics at every single training epoch :param kwargs: Dict Kwargs will be passed to `_load_from_checkpoint()` """ _IGNORED_HYPERPARAMETERS = [ 'net', 'edge_affinity_head', 'partitioner', 'criterion', 'edge_affinity_criterion', 'node_offset_criterion'] def __init__( self, net: torch.nn.Module, edge_affinity_head: torch.nn.Module, partitioner: 'InstancePartitioner', criterion: 'torch.nn._Loss', optimizer: torch.optim.Optimizer, scheduler: torch.optim.lr_scheduler.LRScheduler, num_classes: int, stuff_classes: List[int], class_names: List[str] = None, sampling_loss: bool = False, loss_type: str = 'ce_kl', weighted_loss: bool = True, init_linear: str = None, init_rpe: str = None, transformer_lr_scale: float = 1, multi_stage_loss_lambdas: List[float] = None, edge_affinity_criterion: 'torch.nn._Loss' = None, edge_affinity_loss_weights: List[float] = None, edge_affinity_loss_lambda: float = 1, node_offset_criterion: 'torch.nn._Loss' = None, node_offset_loss_lambda: float = 1, gc_every_n_steps: int = 0, track_val_every_n_epoch: int = 1, track_val_idx: int = None, track_test_idx: int = None, min_instance_size: int = 100, partition_every_n_epoch: int = 50, no_instance_metrics: bool = True, no_instance_metrics_on_train_set: bool = True, **kwargs): super().__init__( net, criterion, optimizer, scheduler, num_classes, class_names=class_names, sampling_loss=sampling_loss, loss_type=loss_type, weighted_loss=weighted_loss, init_linear=init_linear, init_rpe=init_rpe, transformer_lr_scale=transformer_lr_scale, multi_stage_loss_lambdas=multi_stage_loss_lambdas, gc_every_n_steps=gc_every_n_steps, track_val_every_n_epoch=track_val_every_n_epoch, track_val_idx=track_val_idx, track_test_idx=track_test_idx, **kwargs) # Instance partition head, expects a fully-fledged # InstancePartitioner module as input. # This module is only called when the actual instance/panoptic # segmentation is required. At train time, it is not essential, # since we do not propagate gradient to its parameters. However, # we still tune its parameters to maximize instance/panoptic # metrics on the train set. This tuning involves a simple # grid-search on a small range of parameters and needs to be # called at least once at the very end of training self.partition_every_n_epoch = partition_every_n_epoch self.no_instance_metrics = no_instance_metrics self.no_instance_metrics_on_train_set = no_instance_metrics_on_train_set self.partitioner = partitioner # Store the stuff class indices self.stuff_classes = stuff_classes # Loss functions for edge affinity and node offset predictions. # NB: the semantic loss is already accounted for in the # SemanticSegmentationModule constructor self.edge_affinity_criterion = BCEWithLogitsLoss() \ if edge_affinity_criterion is None else edge_affinity_criterion # self.node_offset_criterion = WeightedL2Loss() \ # if node_offset_criterion is None else node_offset_criterion # Model heads for edge affinity and node offset predictions # Initialize the model segmentation head (or heads) # out_dim = self.net.out_dim[0] if self.multi_stage_loss \ # else self.net.out_dim # self.edge_affinity_head = FFN(out_dim * 2, hidden_dim=32, out_dim=1) self.edge_affinity_head = edge_affinity_head # self.node_offset_head = FFN(out_dim, hidden_dim=32, out_dim=3) # Custom weight initialization. In particular, this applies # Xavier / Glorot initialization on Linear and RPE layers by # default, but can be tuned init = lambda m: init_weights(m, linear=init_linear, rpe=init_rpe) self.edge_affinity_head.apply(init) # self.node_offset_head.apply(init) # Metric objects for calculating panoptic segmentation scores on # each dataset split self.train_panoptic = PanopticQuality3D( self.num_classes, ignore_unseen_classes=True, stuff_classes=self.stuff_classes, compute_on_cpu=True, **kwargs) self.val_panoptic = PanopticQuality3D( self.num_classes, ignore_unseen_classes=True, stuff_classes=self.stuff_classes, compute_on_cpu=True, **kwargs) self.test_panoptic = PanopticQuality3D( self.num_classes, ignore_unseen_classes=True, stuff_classes=self.stuff_classes, compute_on_cpu=True, **kwargs) # Metric objects for calculating semantic segmentation scores on # predicted instances on each dataset split self.train_semantic = ConfusionMatrix(self.num_classes) self.val_semantic = ConfusionMatrix(self.num_classes) self.test_semantic = ConfusionMatrix(self.num_classes) # Metric objects for calculating instance segmentation scores on # each dataset split self.train_instance = MeanAveragePrecision3D( self.num_classes, stuff_classes=self.stuff_classes, min_size=min_instance_size, compute_on_cpu=True, remove_void=True, **kwargs) self.val_instance = MeanAveragePrecision3D( self.num_classes, stuff_classes=self.stuff_classes, min_size=min_instance_size, compute_on_cpu=True, remove_void=True, **kwargs) self.test_instance = MeanAveragePrecision3D( self.num_classes, stuff_classes=self.stuff_classes, min_size=min_instance_size, compute_on_cpu=True, remove_void=True, **kwargs) # Storage to accumulate multiple batch partition predictions, to # be used when searching for the best partition setting self.train_multi_partition_storage = [] # Metric objects for calculating node offset prediction scores # on each dataset split # self.train_offset_wl2 = WeightedL2Error() # self.train_offset_wl1 = WeightedL1Error() # self.train_offset_l2 = L2Error() # self.train_offset_l1 = L1Error() # self.val_offset_wl2 = WeightedL2Error() # self.val_offset_wl1 = WeightedL1Error() # self.val_offset_l2 = L2Error() # self.val_offset_l1 = L1Error() # self.test_offset_wl2 = WeightedL2Error() # self.test_offset_wl1 = WeightedL1Error() # self.test_offset_l2 = L2Error() # self.test_offset_l1 = L1Error() # Metric objects for calculating edge affinity prediction scores # on each dataset split self.train_affinity_oa = BinaryAccuracy() self.train_affinity_f1 = BinaryF1Score() self.val_affinity_oa = BinaryAccuracy() self.val_affinity_f1 = BinaryF1Score() self.test_affinity_oa = BinaryAccuracy() self.test_affinity_f1 = BinaryF1Score() # For averaging losses across batches self.train_semantic_loss = MeanMetric() self.train_edge_affinity_loss = MeanMetric() # self.train_node_offset_loss = MeanMetric() self.val_semantic_loss = MeanMetric() self.val_edge_affinity_loss = MeanMetric() # self.val_node_offset_loss = MeanMetric() self.test_semantic_loss = MeanMetric() self.test_edge_affinity_loss = MeanMetric() # self.test_node_offset_loss = MeanMetric() # For tracking best-so-far validation metrics self.val_map_best = MaxMetric() self.val_pq_best = MaxMetric() self.val_pqmod_best = MaxMetric() self.val_mprec_best = MaxMetric() self.val_mrec_best = MaxMetric() self.val_instance_miou_best = MaxMetric() self.val_instance_oa_best = MaxMetric() self.val_instance_macc_best = MaxMetric() # self.val_offset_wl2_best = MinMetric() # self.val_offset_wl1_best = MinMetric() # self.val_offset_l2_best = MinMetric() # self.val_offset_l1_best = MinMetric() self.val_affinity_oa_best = MaxMetric() self.val_affinity_f1_best = MaxMetric() @property def needs_partition(self) -> bool: """Whether the `self.partitioner` should be called to compute the actual panoptic segmentation. During training, the actual partition is not really needed, as we do not learn to partition, but learn to predict inputs for the partition step instead. For this reason, we save compute and time during training by only computing the partition once in a while with `self.partition_every_n_epoch`. """ # Get the current epoch. For the validation set, we alter the # epoch number so that `partition_every_n_epoch` can align # with `check_val_every_n_epoch`. Indeed, it seems the epoch # number during the validation step is always one increment # ahead epoch = self.current_epoch + 1 # If no Trainer attached to the model, run the partition if self._trainer is None: return True # Come useful checks to decide whether the partition should be # triggered k = self.partition_every_n_epoch last_epoch = epoch == self.trainer.max_epochs first_epoch = epoch == 1 kth_epoch = epoch % k == 0 if k > 0 else False # For training, the partition is computed based on # `partition_every_n_epoch`, or if we reached the last epoch. # The first epoch will be skipped, because trained weights are # unlikely to produce interesting inputs for the partition if self.trainer.training: return (kth_epoch and not first_epoch) or last_epoch # For validation, we have the same behavior as training, with # the difference that if `check_val_every_n_epoch` is larger # than `partition_every_n_epoch`, we automatically trigger the # partition if self.trainer.validating: k_val = self.trainer.check_val_every_n_epoch nearest_multiple = epoch % k < k_val if k > 0 else False if 0 < k <= k_val: return not first_epoch or last_epoch else: return (nearest_multiple and not first_epoch) or last_epoch # For all other Trainer stages, we run the partition by default return True @property def needs_instance(self) -> bool: """Returns True if the instance segmentation metrics need to be computed. In particular, since computing instance metrics can be computationally costly, we may want to skip it during training by setting `no_instance_metrics_on_train_set=True`, or all the time by setting `no_instance_metrics=True`. """ if self.no_instance_metrics: return False if self._trainer is None: return self.needs_partition if self.trainer.training and self.no_instance_metrics_on_train_set: return False return self.needs_partition def forward( self, nag: NAG, grid: Any = None ) -> PanopticSegmentationOutput: # Extract features x = self.net(nag) # Compute level-1 or multi-level semantic predictions semantic_pred = [head(x_) for head, x_ in zip(self.head, x)] \ if self.multi_stage_loss else self.head(x) # Recover level-1 features only x = x[0] if self.multi_stage_loss else x # Compute node offset predictions # node_offset_pred = self.node_offset_head(x) # # Forcefully set 0-offset for nodes with stuff predictions # node_logits = semantic_pred[0] if self.multi_stage_loss \ # else semantic_pred # is_stuff = get_stuff_mask(node_logits, self.stuff_classes) # node_offset_pred[is_stuff] = 0 # TODO: OPTIONALLY REMOVE OFFSET # node_offset_pred = node_offset_pred * 0 # TODO: offset soft-assigned to 0 based on the predicted # stuff/thing probas. A stuff/thing classification loss could # provide additional supervision # Compute edge affinity predictions # NB: we make edge features symmetric, since we want to compute # edge affinity, which is not directed x_edge = x[nag[1].obj_edge_index] x_edge = torch.cat( ((x_edge[0] - x_edge[1]).abs(), (x_edge[0] + x_edge[1]) / 2), dim=1) norm_index = torch.zeros( x_edge.shape[0], device=x_edge.device, dtype=torch.long) edge_affinity_logits = self.edge_affinity_head( x_edge, batch=norm_index).squeeze() # Gather results in an output object output = PanopticSegmentationOutput( semantic_pred, self.stuff_classes, edge_affinity_logits, # node_offset_pred, nag.get_sub_size(1)) # Compute the panoptic partition output = self._forward_partition(nag, output, grid=grid) return output def _forward_partition( self, nag: NAG, output: PanopticSegmentationOutput, grid: Any = None, force: bool = False ) -> PanopticSegmentationOutput: """Compute the panoptic partition based on the predicted node offsets, node semantic logits, and edge affinity logits. The partition will only be computed if required. In general, during training, the actual partition is not needed for the model to be supervised. We only run it once in a while to evaluate the panoptic/instance segmentation metrics or tune the partition hyperparameters on the train set. :param nag: NAG object :param output: PanopticSegmentationOutput :param grid: Dict A dictionary containing settings for grid-searching optimal partition parameters :param force: bool Whether to forcefully compute the partition, regardless of `self.needs_partition`. This mechanism is typically needed during training when we want to store or log predictions for a batch of interest at an epoch when `self.needs_partition` is False :return: output """ if not self.needs_partition and not force: return output # Recover some useful information from the NAG and # PanopticSegmentationOutput objects batch = nag[1].batch # node_x = nag[1].pos + output.node_offset_pred node_x = nag[1].pos node_size = nag.get_sub_size(1) node_logits = output.logits[0] if output.multi_stage else output.logits edge_index = nag[1].obj_edge_index edge_affinity_logits = output.edge_affinity_logits # Compute the instance partition # NB: we detach the tensors here: this operation runs on CPU and # is non-differentiable obj_index = self.partitioner( batch, node_x.detach(), node_logits.detach(), self.stuff_classes, node_size, edge_index, edge_affinity_logits.detach(), grid=grid) # Store the results in the output object output.obj_index_pred = obj_index return output def on_fit_start(self) -> None: super().on_fit_start() # Get the LightningDataModule stuff classes and make sure it # matches self.stuff_classes. We could also forcefully update # the LightningModule with this new information, but it could # easily become tedious to track all places where stuff_classes # affects the LightningModule object. stuff_classes = self.trainer.datamodule.train_dataset.stuff_classes assert sorted(stuff_classes) == sorted(self.stuff_classes), \ f'LightningModule has the following stuff classes ' \ f'{self.stuff_classes} while the LightningDataModule has ' \ f'{stuff_classes}.' def on_train_start(self) -> None: # By default, lightning executes validation step sanity checks # before training starts, so we need to make sure `*_best` # metrics do not store anything from these checks super().on_train_start() self.val_panoptic.reset() self.val_semantic.reset() self.val_instance.reset() # self.val_offset_wl2.reset() # self.val_offset_wl1.reset() # self.val_offset_l2.reset() # self.val_offset_l1.reset() self.val_affinity_oa.reset() self.val_affinity_f1.reset() self.val_map_best.reset() self.val_pq_best.reset() self.val_pqmod_best.reset() self.val_mprec_best.reset() self.val_mrec_best.reset() self.val_instance_miou_best.reset() self.val_instance_oa_best.reset() self.val_instance_macc_best.reset() # self.val_offset_wl2_best.reset() # self.val_offset_wl1_best.reset() # self.val_offset_l2_best.reset() # self.val_offset_l1_best.reset() self.val_affinity_oa_best.reset() self.val_affinity_f1_best.reset() self.train_multi_partition_storage = [] def _create_empty_output(self, nag: NAG) -> PanopticSegmentationOutput: """Local helper method to initialize an empty output for multi-run prediction. """ # Prepare empty output for semantic segmentation output_semseg = super()._create_empty_output(nag) # Prepare empty edge affinity and node offset outputs num_edges = nag[1].obj_edge_index.shape[1] edge_affinity_logits = torch.zeros(num_edges, device=nag.device) # node_offset_pred = torch.zeros_like(nag[1].pos) node_size = nag.get_sub_size(1) return PanopticSegmentationOutput( output_semseg.logits, self.stuff_classes, edge_affinity_logits, # node_offset_pred, node_size) @staticmethod def _update_output_multi( output_multi: PanopticSegmentationOutput, nag: NAG, output: PanopticSegmentationOutput, nag_transformed: NAG, key: str ) -> PanopticSegmentationOutput: """Local helper method to accumulate multiple predictions on the same--or part of the same--point cloud. """ raise NotImplementedError( "The current implementation does not properly support multi-run " "for instance/panoptic segmentation") # Update semantic segmentation logits only output_multi = super()._update_output_multi( output_multi, nag, output, nag_transformed, key) # Update node-wise predictions # TODO: this is INCORRECT accumulation of node offsets. Need to # define the mean, not the mean of the successive predictions node_id = nag_transformed[1][key] output_multi.node_offset_pred[node_id] = \ (output_multi.node_offset_pred[node_id] + output.node_offset_pred) / 2 # Update edge-wise predictions edge_index_1 = nag[1].obj_edge_index edge_index_2 = node_id[nag_transformed[1].obj_edge_index] base = nag[1].num_points + 1 edge_id_1 = edge_index_1[0] * base + edge_index_1[1] edge_id_2 = edge_index_2[0] * base + edge_index_2[1] edge_id_cat = consecutive_cluster(torch.cat((edge_id_1, edge_id_2)))[0] edge_id_1 = edge_id_cat[:edge_id_1.numel()] edge_id_2 = edge_id_cat[edge_id_1.numel():] pivot = torch.zeros(base ** 2, device=output.edge_affinity_logits) pivot[edge_id_1] = output_multi.edge_affinity_logits # TODO: this is INCORRECT accumulation of node offsets. Need to # define the mean, not the mean of the successive predictions pivot[edge_id_2] = (pivot[edge_id_2] + output.edge_affinity_logits) / 2 output_multi.edge_affinity_logits = pivot[edge_id_1] return output_multi @staticmethod def _propagate_output_to_unseen_neighbors( output: PanopticSegmentationOutput, nag: NAG, seen: torch.Tensor, neighbors: torch.Tensor ) -> PanopticSegmentationOutput: """Local helper method to propagate predictions to unseen neighbors. """ # Propagate semantic segmentation to neighbors output = super()._propagate_output_to_unseen_neighbors( output, nag, seen, neighbors) # Heuristic for unseen node offsets: unseen nodes take the same # offset as their nearest neighbor seen_idx = torch.where(seen)[0] unseen_idx = torch.where(~seen)[0] output.node_offset_pred[unseen_idx] = \ output.node_offset_pred[seen_idx][neighbors] # Heuristic for unseen edge affinity predictions: we set the # edge affinity to 0.5 seen_edge = nag[1].obj_edge_index[seen] unseen_edge_idx = torch.where(~seen_edge)[0] output.edge_affinity_logits[unseen_edge_idx] = 0.5 return output def get_target( self, nag: NAG, output: PanopticSegmentationOutput ) -> PanopticSegmentationOutput: """Recover the target data for semantic and panoptic segmentation and store it in the `output` object. More specifically: - label histogram(s) for semantic segmentation will be saved in `output.y_hist` - instance graph data `obj_edge_index` and `obj_edge_affinity` will be saved in `output.obj_edge_index` and `output.obj_edge_affinity`, respectively - node positions `pos` and `obj_pos` will be saved in `output.pos` and `output.obj_pos`, respectively. Besides, the `output.obj_offset` will carry the target offset, computed from those """ # Recover targets for semantic segmentation output = super().get_target(nag, output) # Recover targets for instance/panoptic segmentation output.obj_edge_index = getattr(nag[1], 'obj_edge_index', None) output.obj_edge_affinity = getattr(nag[1], 'obj_edge_affinity', None) output.pos = nag[1].pos output.obj_pos = getattr(nag[1], 'obj_pos', None) output.obj = nag[1].obj return output def _edge_affinity_weights( self, is_same_class: torch.Tensor, is_same_obj: torch.Tensor ) -> torch.Tensor: """Helper function to compute edge weights to be used by the edge affinity loss. Each edge may have a different weight, based on whether its source and target nodes have the same class or belong to the same object. The weight given to each case (same-class and same-object, same-class and different object, etc..) is specified in `edge_affinity_loss_weights`. :param is_same_class: BoolTensor Mask indicating edges between nodes of the same semantic class :param is_same_obj: BoolTensor Mask indicating edges between nodes of the same object """ # Recover the weights given to each case w = self.hparams.edge_affinity_loss_weights # If edge_affinity_loss_weights was not specified, no weighting # scheme will be applied to the edges if w is None or not len(w) == 4: return None # Compute the weight for each edge edge_weight = torch.ones_like(is_same_class).float() edge_weight[is_same_class * is_same_obj] = w[0] edge_weight[is_same_class * ~is_same_obj] = w[1] edge_weight[~is_same_class * is_same_obj] = w[2] edge_weight[~is_same_class * ~is_same_obj] = w[3] return edge_weight def model_step( self, batch: NAG ) -> Tuple[torch.Tensor, PanopticSegmentationOutput]: # Loss and predictions for semantic segmentation semantic_loss, output = super().model_step(batch) # Cannot compute losses if some target data are missing if not output.has_target: return None, output # Compute the node offset loss, weighted by the node size # node_offset_loss = self.node_offset_criterion( # *output.sanitized_node_offsets) # Compute the edge affinity loss edge_affinity_pred, edge_affinity_target, is_same_class, is_same_obj = \ output.sanitized_edge_affinities() edge_weight = self._edge_affinity_weights(is_same_class, is_same_obj) edge_affinity_loss = self.edge_affinity_criterion( edge_affinity_pred, edge_affinity_target, edge_weight) # Combine the losses together # TODO: remove node offset cleanly # loss = semantic_loss \ # + self.hparams.edge_affinity_loss_lambda * edge_affinity_loss \ # + self.hparams.node_offset_loss_lambda * node_offset_loss loss = semantic_loss \ + self.hparams.edge_affinity_loss_lambda * edge_affinity_loss # Save individual losses in the output object output.semantic_loss = semantic_loss # TODO: remove node offset cleanly # output.node_offset_loss = 0 output.edge_affinity_loss = edge_affinity_loss return loss, output def train_step_update_metrics( self, loss: torch.Tensor, output: PanopticSegmentationOutput ) -> None: """Update train metrics with the content of the output object. """ # Update semantic segmentation metrics super().train_step_update_metrics(loss, output) # Update instance and panoptic metrics if self.needs_partition and not output.has_multi_instance_pred: obj_score, obj_y, instance_data = output.panoptic_pred() obj_score = obj_score.detach().cpu() obj_y = obj_y.detach() obj_hist = instance_data.target_label_histogram(self.num_classes) self.train_panoptic.update(obj_y.cpu(), instance_data.cpu()) self.train_semantic(obj_y, obj_hist) if self.needs_instance: self.train_instance.update(obj_score, obj_y, instance_data.cpu()) elif self.needs_partition: logits = output.logits[0] if output.multi_stage else output.logits storage = PartitionParameterSearchStorage( logits.detach().cpu(), self.stuff_classes, output.node_size.detach().cpu(), output.edge_affinity_logits.detach().cpu(), output.obj.cpu(), [(v[0], v[1].detach().cpu()) for v in output.obj_index_pred]) self.train_multi_partition_storage.append(storage) # Update tracked losses self.train_semantic_loss(output.semantic_loss.detach()) # self.train_node_offset_loss(output.node_offset_loss.detach()) self.train_edge_affinity_loss(output.edge_affinity_loss.detach()) # Update node offset metrics # node_offset_pred, node_offset, node_size = output.sanitized_node_offsets # node_offset_pred = node_offset_pred.detach() # node_offset = node_offset.detach() # node_size = node_size.detach() # self.train_offset_wl2(node_offset_pred, node_offset, node_size) # self.train_offset_wl1(node_offset_pred, node_offset, node_size) # self.train_offset_l2(node_offset_pred, node_offset) # self.train_offset_l1(node_offset_pred, node_offset) # Update edge affinity metrics ea_pred, ea_target, is_same_class, is_same_obj = \ output.sanitized_edge_affinities() ea_pred = ea_pred.detach() ea_target_binary = (ea_target.detach() > 0.5).long() self.train_affinity_oa(ea_pred, ea_target_binary) self.train_affinity_f1(ea_pred, ea_target_binary) def train_step_log_metrics(self) -> None: """Log train metrics after a single step with the content of the output object. """ super().train_step_log_metrics() self.log( "train/semantic_loss", self.train_semantic_loss, on_step=False, on_epoch=True, prog_bar=True) # self.log( # "train/node_offset_loss", self.train_node_offset_loss, on_step=False, # on_epoch=True, prog_bar=True) self.log( "train/edge_affinity_loss", self.train_edge_affinity_loss, on_step=False, on_epoch=True, prog_bar=True) def on_train_epoch_end(self) -> None: # Log semantic segmentation metrics and reset confusion matrix super().on_train_epoch_end() # TODO: support logging panoptic metrics for DDP if self.trainer.num_devices > 1: log.warning( "Panoptic and instance segmentation metrics are not guaranteed " "to be well-behaved on DDP yet.") if self.needs_partition: # If multiple partitions settings were tested during the # epoch, this will search for the best one, update the # internal states of train metrics with related predictions, # and update the partitioner's settings setting = self._compute_best_partition_settings()[0] # Compute the instance and panoptic metrics panoptic_results = self.train_panoptic.compute() if self.needs_instance: instance_results = self.train_instance.compute() # Gather tracked metrics pq = panoptic_results.pq sq = panoptic_results.sq rq = panoptic_results.rq pq_thing = panoptic_results.pq_thing pq_stuff = panoptic_results.pq_stuff pqmod = panoptic_results.pq_modified mprec = panoptic_results.mean_precision mrec = panoptic_results.mean_recall pq_per_class = panoptic_results.pq_per_class if self.needs_instance: map = instance_results.map map_50 = instance_results.map_50 map_75 = instance_results.map_75 map_per_class = instance_results.map_per_class # Log metrics self.log("train/pq", 100 * pq, prog_bar=True) self.log("train/sq", 100 * sq, prog_bar=True) self.log("train/rq", 100 * rq, prog_bar=True) self.log("train/pq_thing", 100 * pq_thing, prog_bar=True) self.log("train/pq_stuff", 100 * pq_stuff, prog_bar=True) self.log("train/pqmod", 100 * pqmod, prog_bar=True) self.log("train/mprec", 100 * mprec, prog_bar=True) self.log("train/mrec", 100 * mrec, prog_bar=True) self.log("train/instance_miou", self.train_semantic.miou(), prog_bar=True) self.log("train/instance_oa", self.train_semantic.oa(), prog_bar=True) self.log("train/instance_macc", self.train_semantic.macc(), prog_bar=True) for iou, seen, name in zip(*self.train_semantic.iou(), self.class_names): if seen: self.log(f"train/instance_iou_{name}", iou, prog_bar=True) if self.needs_instance: self.log("train/map", 100 * map, prog_bar=True) self.log("train/map_50", 100 * map_50, prog_bar=True) self.log("train/map_75", 100 * map_75, prog_bar=True) for pq_c, name in zip(pq_per_class, self.class_names): self.log(f"train/pq_{name}", 100 * pq_c, prog_bar=True) if self.needs_instance: for map_c, name in zip(map_per_class, self.class_names): self.log(f"train/map_{name}", 100 * map_c, prog_bar=True) if setting is not None: for k, v in setting.items(): self.log(f"partition_settings/{k}", v, prog_bar=True) # Log metrics # self.log("train/offset_wl2", self.train_offset_wl2.compute(), prog_bar=True) # self.log("train/offset_wl1", self.train_offset_wl1.compute(), prog_bar=True) # self.log("train/offset_l2", self.train_offset_l2.compute(), prog_bar=True) # self.log("train/offset_l1", self.train_offset_l1.compute(), prog_bar=True) self.log("train/affinity_oa", 100 * self.train_affinity_oa.compute(), prog_bar=True) self.log("train/affinity_f1", 100 * self.train_affinity_f1.compute(), prog_bar=True) # Reset metrics accumulated over the last epoch # self.train_offset_wl2.reset() # self.train_offset_wl1.reset() # self.train_offset_l2.reset() # self.train_offset_l1.reset() self.train_affinity_oa.reset() self.train_affinity_f1.reset() self.train_panoptic.reset() self.train_semantic.reset() self.train_instance.reset() def _compute_best_partition_settings( self, monitor: str = 'pq', maximize: bool = True ) -> Tuple[Dict, float]: """Compute the best partition settings from `self.train_multi_partition_storage`. This will have the following internal effects: - `self.partitioner` will be updated with the settings which produced the best metrics on the epoch - `self.train_panoptic` will be updated with the batch predictions with the best settings - `self.train_instance` will be updated with the batch predictions with the best settings, if required :param monitor: str The metric based on which we will select the best settings :param maximize: bool Whether the monitored metric should be maximized or minimized :return: """ # Nothing happens if multi-partition was not activated during # the epoch if len(self.train_multi_partition_storage) == 0: return None, None # Reset the instance and panoptic metrics, these will be used to # compute metric performance self.train_panoptic.reset() self.train_instance.reset() # Check whether the metric to monitor is for the semantic or # panoptic segmentation task if monitor in self.train_panoptic.__slots__: task = 'panoptic' meter = self.train_panoptic elif monitor in self.train_instance.__slots__: task = 'instance' meter = self.train_instance else: raise ValueError(f"Unknown metric, cannot monitor '{monitor}'.") if task == 'instance' and not self.needs_instance: raise ValueError( 'Cannot compute the best partition settings on the train set ' 'based on instance metrics if `self.needs_instance` is False') # Recover from the first PartitionParameterSearchStorage, which # settings were explored settings = self.train_multi_partition_storage[0].settings # Compute the metric for each partition setting while tracking # the best setting best_metric = -torch.inf if maximize else torch.inf best_setting = None for s in settings: # Accumulate batch predictions in the meter for storage in self.train_multi_partition_storage: obj_score, obj_y, instance_data = \ storage.panoptic_pred(s) if task == 'panoptic': meter.update(obj_y, instance_data) else: meter.update(obj_score, obj_y, instance_data) # Compute the monitored metric on the whole epoch metric = getattr(meter.compute(), monitor) # Update the best metric and settings condition = (metric > best_metric) if maximize \ else (metric < best_setting) if condition: best_metric = metric best_setting = s # Reset the meter to avoid mixing predictions of different # settings meter.reset() # Update the partitioner with the best metrics for k, v in best_setting.items(): setattr(self.partitioner, k, v) # Update the train meters with the data for computation of # logged metrics with the accumulated data from the best # setting, thus mimicking a normal epoch with a single partition # prediction per batch for storage in self.train_multi_partition_storage: obj_score, obj_y, instance_data = \ storage.panoptic_pred(best_setting) obj_hist = instance_data.target_label_histogram(self.num_classes) self.train_panoptic.update(obj_y, instance_data) self.train_semantic( obj_y.to(self.train_semantic.device), obj_hist.to(self.train_semantic.device)) if self.needs_instance: self.train_instance.update(obj_score, obj_y, instance_data) return best_setting, best_metric def validation_step_update_metrics( self, loss: torch.Tensor, output: PanopticSegmentationOutput ) -> None: """Update validation metrics with the content of the output object. """ # Update semantic segmentation metrics super().validation_step_update_metrics(loss, output) # Update instance and panoptic metrics if self.needs_partition: obj_score, obj_y, instance_data = output.panoptic_pred() obj_score = obj_score.detach().cpu() obj_y = obj_y.detach() obj_hist = instance_data.target_label_histogram(self.num_classes) self.val_panoptic.update(obj_y.cpu(), instance_data.cpu()) self.val_semantic(obj_y, obj_hist) if self.needs_instance: self.val_instance.update(obj_score, obj_y, instance_data.cpu()) # Update tracked losses self.val_semantic_loss(output.semantic_loss.detach()) # self.val_node_offset_loss(output.node_offset_loss.detach()) self.val_edge_affinity_loss(output.edge_affinity_loss.detach()) # Update node offset metrics # node_offset_pred, node_offset, node_size = output.sanitized_node_offsets # node_offset_pred = node_offset_pred.detach() # node_offset = node_offset.detach() # node_size = node_size.detach() # self.val_offset_wl2(node_offset_pred, node_offset, node_size) # self.val_offset_wl1(node_offset_pred, node_offset, node_size) # self.val_offset_l2(node_offset_pred, node_offset) # self.val_offset_l1(node_offset_pred, node_offset) # Update edge affinity metrics ea_pred, ea_target, is_same_class, is_same_obj = \ output.sanitized_edge_affinities() ea_pred = ea_pred.detach() ea_target_binary = (ea_target.detach() > 0.5).long() self.val_affinity_oa(ea_pred, ea_target_binary) self.val_affinity_f1(ea_pred, ea_target_binary) def validation_step_log_metrics(self) -> None: """Log validation metrics after a single step with the content of the output object. """ super().validation_step_log_metrics() self.log( "val/semantic_loss", self.val_semantic_loss, on_step=False, on_epoch=True, prog_bar=True) # self.log( # "val/node_offset_loss", self.val_node_offset_loss, on_step=False, # on_epoch=True, prog_bar=True) self.log( "val/edge_affinity_loss", self.val_edge_affinity_loss, on_step=False, on_epoch=True, prog_bar=True) def on_validation_epoch_end(self) -> None: # Log semantic segmentation metrics and reset confusion matrix super().on_validation_epoch_end() # TODO: support logging panoptic metrics for DDP if self.trainer.num_devices > 1: log.warning( "Panoptic and instance segmentation metrics are not guaranteed " "to be well-behaved on DDP yet.") if self.needs_partition: # Compute the instance and panoptic metrics panoptic_results = self.val_panoptic.compute() if self.needs_instance: instance_results = self.val_instance.compute() # Gather tracked metrics pq = panoptic_results.pq sq = panoptic_results.sq rq = panoptic_results.rq pq_thing = panoptic_results.pq_thing pq_stuff = panoptic_results.pq_stuff pqmod = panoptic_results.pq_modified mprec = panoptic_results.mean_precision mrec = panoptic_results.mean_recall pq_per_class = panoptic_results.pq_per_class if self.needs_instance: map = instance_results.map map_50 = instance_results.map_50 map_75 = instance_results.map_75 map_per_class = instance_results.map_per_class # Log metrics self.log("val/pq", 100 * pq, prog_bar=True) self.log("val/sq", 100 * sq, prog_bar=True) self.log("val/rq", 100 * rq, prog_bar=True) self.log("val/pq_thing", 100 * pq_thing, prog_bar=True) self.log("val/pq_stuff", 100 * pq_stuff, prog_bar=True) self.log("val/pqmod", 100 * pqmod, prog_bar=True) self.log("val/mprec", 100 * mprec, prog_bar=True) self.log("val/mrec", 100 * mrec, prog_bar=True) instance_miou = self.val_semantic.miou() instance_oa = self.val_semantic.oa() instance_macc = self.val_semantic.macc() self.log("val/instance_miou", instance_miou, prog_bar=True) self.log("val/instance_oa", instance_oa, prog_bar=True) self.log("val/instance_macc", instance_macc, prog_bar=True) for iou, seen, name in zip(*self.val_semantic.iou(), self.class_names): if seen: self.log(f"val/instance_iou_{name}", iou, prog_bar=True) if self.needs_instance: self.log("val/map", 100 * map, prog_bar=True) self.log("val/map_50", 100 * map_50, prog_bar=True) self.log("val/map_75", 100 * map_75, prog_bar=True) for pq_c, name in zip(pq_per_class, self.class_names): self.log(f"val/pq_{name}", 100 * pq_c, prog_bar=True) if self.needs_instance: for map_c, name in zip(map_per_class, self.class_names): self.log(f"val/map_{name}", 100 * map_c, prog_bar=True) # Update best-so-far metrics self.val_pq_best(pq) self.val_pqmod_best(pqmod) self.val_mprec_best(mprec) self.val_mrec_best(mrec) if self.needs_instance: self.val_map_best(map) self.val_instance_miou_best(instance_miou) self.val_instance_oa_best(instance_oa) self.val_instance_macc_best(instance_macc) # Log best-so-far metrics, using `.compute()` instead of passing # the whole torchmetrics object, because otherwise metric would # be reset by lightning after each epoch self.log("val/pq_best", 100 * self.val_pq_best.compute(), prog_bar=True) self.log("val/pqmod_best", 100 * self.val_pqmod_best.compute(), prog_bar=True) self.log("val/mprec_best", 100 * self.val_mprec_best.compute(), prog_bar=True) self.log("val/mrec_best", 100 * self.val_mrec_best.compute(), prog_bar=True) if self.needs_instance: self.log("val/map_best", 100 * self.val_map_best.compute(), prog_bar=True) self.log("val/instance_miou_best", self.val_instance_miou_best.compute(), prog_bar=True) self.log("val/instance_oa_best", self.val_instance_oa_best.compute(), prog_bar=True) self.log("val/instance_macc_best", self.val_instance_macc_best.compute(), prog_bar=True) # Compute the metrics tracked for model selection on validation # offset_wl2 = self.val_offset_wl2.compute() # offset_wl1 = self.val_offset_wl1.compute() # offset_l2 = self.val_offset_l2.compute() # offset_l1 = self.val_offset_l1.compute() affinity_oa = self.val_affinity_oa.compute() affinity_f1 = self.val_affinity_f1.compute() # Log metrics # self.log("val/offset_wl2", offset_wl2, prog_bar=True) # self.log("val/offset_wl1", offset_wl1, prog_bar=True) # self.log("val/offset_l2", offset_l2, prog_bar=True) # self.log("val/offset_l1", offset_l1, prog_bar=True) self.log("val/affinity_oa", 100 * affinity_oa, prog_bar=True) self.log("val/affinity_f1", 100 * affinity_f1, prog_bar=True) # Update best-so-far metrics # self.val_offset_wl2_best(offset_wl2) # self.val_offset_wl1_best(offset_wl1) # self.val_offset_l2_best(offset_l2) # self.val_offset_l1_best(offset_l1) self.val_affinity_oa_best(affinity_oa) self.val_affinity_f1_best(affinity_f1) # Log best-so-far metrics, using `.compute()` instead of passing # the whole torchmetrics object, because otherwise metric would # be reset by lightning after each epoch # self.log("val/offset_wl2_best", self.val_offset_wl2_best.compute(), prog_bar=True) # self.log("val/offset_wl1_best", self.val_offset_wl1_best.compute(), prog_bar=True) # self.log("val/offset_l2_best", self.val_offset_l2_best.compute(), prog_bar=True) # self.log("val/offset_l1_best", self.val_offset_l1_best.compute(), prog_bar=True) self.log("val/affinity_oa_best", 100 * self.val_affinity_oa_best.compute(), prog_bar=True) self.log("val/affinity_f1_best", 100 * self.val_affinity_f1_best.compute(), prog_bar=True) # Reset metrics accumulated over the last epoch # self.val_offset_wl2.reset() # self.val_offset_wl1.reset() # self.val_offset_l2.reset() # self.val_offset_l1.reset() self.val_affinity_oa.reset() self.val_affinity_f1.reset() self.val_panoptic.reset() self.val_semantic.reset() self.val_instance.reset() def test_step_update_metrics( self, loss: torch.Tensor, output: PanopticSegmentationOutput ) -> None: """Update test metrics with the content of the output object. """ # Update semantic segmentation metrics super().test_step_update_metrics(loss, output) # If the test set misses targets, we keep track of it, to skip # metrics computation on the test set if not self.test_has_target: return # Update instance and panoptic metrics if self.needs_partition: obj_score, obj_y, instance_data = output.panoptic_pred() obj_score = obj_score.detach().cpu() obj_y = obj_y.detach() obj_hist = instance_data.target_label_histogram(self.num_classes) self.test_panoptic.update(obj_y.cpu(), instance_data.cpu()) self.test_semantic(obj_y, obj_hist) if self.needs_instance: self.test_instance.update(obj_score, obj_y, instance_data.cpu()) # Update tracked losses self.test_semantic_loss(output.semantic_loss.detach()) # self.test_node_offset_loss(output.node_offset_loss.detach()) self.test_edge_affinity_loss(output.edge_affinity_loss.detach()) # Update node offset metrics # node_offset_pred, node_offset, node_size = output.sanitized_node_offsets # node_offset_pred = node_offset_pred.detach() # node_offset = node_offset.detach() # node_size = node_size.detach() # self.test_offset_wl2(node_offset_pred, node_offset, node_size) # self.test_offset_wl1(node_offset_pred, node_offset, node_size) # self.test_offset_l2(node_offset_pred, node_offset) # self.test_offset_l1(node_offset_pred, node_offset) # Update edge affinity metrics ea_pred, ea_target, is_same_class, is_same_obj = \ output.sanitized_edge_affinities() ea_pred = ea_pred.detach() ea_target_binary = (ea_target.detach() > 0.5).long() self.test_affinity_oa(ea_pred, ea_target_binary) self.test_affinity_f1(ea_pred, ea_target_binary) def test_step_log_metrics(self) -> None: """Log test metrics after a single step with the content of the output object. """ super().test_step_log_metrics() # If the test set misses targets, we keep track of it, to skip # metrics computation on the test set if not self.test_has_target: return self.log( "test/semantic_loss", self.test_semantic_loss, on_step=False, on_epoch=True, prog_bar=True) # self.log( # "test/node_offset_loss", self.test_node_offset_loss, on_step=False, # on_epoch=True, prog_bar=True) self.log( "test/edge_affinity_loss", self.test_edge_affinity_loss, on_step=False, on_epoch=True, prog_bar=True) def on_test_epoch_end(self) -> None: # Log semantic segmentation metrics and reset confusion matrix super().on_test_epoch_end() # If test set misses target data, reset metrics and skip logging if not self.test_has_target: # self.test_offset_wl2.reset() # self.test_offset_wl1.reset() # self.test_offset_l2.reset() # self.test_offset_l1.reset() self.test_affinity_oa.reset() self.test_affinity_f1.reset() self.test_panoptic.reset() self.test_semantic.reset() self.test_instance.reset() return # TODO: support logging panoptic metrics for DDP if self.trainer.num_devices > 1: log.warning( "Panoptic and instance segmentation metrics are not guaranteed " "to be well-behaved on DDP yet.") if self.needs_partition: # Compute the instance and panoptic metrics panoptic_results = self.test_panoptic.compute() if self.needs_instance: instance_results = self.test_instance.compute() # Gather tracked metrics pq = panoptic_results.pq sq = panoptic_results.sq rq = panoptic_results.rq pq_thing = panoptic_results.pq_thing pq_stuff = panoptic_results.pq_stuff pqmod = panoptic_results.pq_modified mprec = panoptic_results.mean_precision mrec = panoptic_results.mean_recall pq_per_class = panoptic_results.pq_per_class if self.needs_instance: map = instance_results.map map_50 = instance_results.map_50 map_75 = instance_results.map_75 map_per_class = instance_results.map_per_class # Log metrics self.log("test/pq", 100 * pq, prog_bar=True) self.log("test/sq", 100 * sq, prog_bar=True) self.log("test/rq", 100 * rq, prog_bar=True) self.log("test/pq_thing", 100 * pq_thing, prog_bar=True) self.log("test/pq_stuff", 100 * pq_stuff, prog_bar=True) self.log("test/pqmod", 100 * pqmod, prog_bar=True) self.log("test/mprec", 100 * mprec, prog_bar=True) self.log("test/mrec", 100 * mrec, prog_bar=True) self.log("test/instance_miou", self.test_semantic.miou(), prog_bar=True) self.log("test/instance_oa", self.test_semantic.oa(), prog_bar=True) self.log("test/instance_macc", self.test_semantic.macc(), prog_bar=True) for iou, seen, name in zip(*self.test_semantic.iou(), self.class_names): if seen: self.log(f"test/instance_iou_{name}", iou, prog_bar=True) if self.needs_instance: self.log("test/map", 100 * map, prog_bar=True) self.log("test/map_50", 100 * map_50, prog_bar=True) self.log("test/map_75", 100 * map_75, prog_bar=True) for pq_c, name in zip(pq_per_class, self.class_names): self.log(f"test/pq_{name}", 100 * pq_c, prog_bar=True) if self.needs_instance: for map_c, name in zip(map_per_class, self.class_names): self.log(f"test/map_{name}", 100 * map_c, prog_bar=True) # Log metrics # self.log("test/offset_wl2", self.test_offset_wl2.compute(), prog_bar=True) # self.log("test/offset_wl1", self.test_offset_wl1.compute(), prog_bar=True) # self.log("test/offset_l2", self.test_offset_l2.compute(), prog_bar=True) # self.log("test/offset_l1", self.test_offset_l1.compute(), prog_bar=True) self.log("test/affinity_oa", 100 * self.test_affinity_oa.compute(), prog_bar=True) self.log("test/affinity_f1", 100 * self.test_affinity_f1.compute(), prog_bar=True) # Reset metrics accumulated over the last epoch # self.test_offset_wl2.reset() # self.test_offset_wl1.reset() # self.test_offset_l2.reset() # self.test_offset_l1.reset() self.test_affinity_oa.reset() self.test_affinity_f1.reset() self.test_panoptic.reset() self.test_semantic.reset() self.test_instance.reset() def track_batch( self, batch: NAG, batch_idx: int, output: PanopticSegmentationOutput, folder: str = None ) -> None: """Store a batch prediction to disk. The corresponding `NAG` object will be populated with panoptic segmentation predictions for: - levels 1+ if `multi_stage` output (i.e. loss supervision on levels 1 and above) - only level 1 otherwise Besides, we also pre-compute the level-0 predictions as this is frequently required for downstream tasks. However, we choose not to compute the full-resolution predictions for the sake of disk memory. If a `folder` is provided, the NAG will be saved there under: /predictions///batch_.h5 If not, the folder will be the logger's directory, if any. If not, the current working directory will be used. :param batch: NAG Object that will be stored to disk. Before that, the model predictions will be added to the attributes of each level, to facilitate downstream use of the stored `NAG` :param batch_idx: int Index of the batch to be stored :param output: PanopticSegmentationOutput Output of `self.model_step()` :param folder: str Path where to save the tracked batch. If not provided, the logger's saving directory will be used as fallback. If not logger is found, the current working directory will be used :return: """ # Sanity check in case using multi-run inference if not isinstance(batch, NAG): raise NotImplementedError( f"Expected as NAG, but received a {type(batch)}. Are you " f"perhaps running multi-run inference ? If so, this is not " f"compatible with batch_saving, please deactivate either one.") # Compute the panoptic partition if not already done if output.obj_index_pred is None: output = self._forward_partition(batch, output, force=True) # Store the output predictions in conveniently-accessible # attributes in the NAG, for easy downstream use of the saved # object sp_y_pred, sp_obj_index_pred, sp_obj_pred = ( output.superpoint_panoptic_pred()) vox_y_pred, vox_obj_index_pred, vox_obj_pred = ( output.voxel_panoptic_pred(super_index=batch[0].super_index)) batch[1].obj_y_pred = sp_y_pred batch[1].obj_index_pred = sp_obj_index_pred batch[1].obj_pred = sp_obj_pred batch[0].obj_y_pred = vox_y_pred batch[0].obj_index_pred = vox_obj_index_pred batch[0].obj_pred = vox_obj_pred batch[1].edge_affinity_logits = output.edge_affinity_logits # Parent behavior for saving semantic segmentation prediction super().track_batch(batch, batch_idx, output, folder=folder) def load_state_dict(self, state_dict: Dict, strict: bool = True) -> None: """Basic `load_state_dict` from `torch.nn.Module` with a bit of acrobatics due to `criterion.weight`. This attribute, when present in the `state_dict`, causes `load_state_dict` to crash. More precisely, `criterion.weight` is holding the per-class weights for classification losses. """ # Special treatment for BCEWithLogitsLoss if self.edge_affinity_criterion.pos_weight is not None: pos_weight_bckp = self.edge_affinity_criterion.pos_weight self.edge_affinity_criterion.pos_weight = None if 'edge_affinity_criterion.pos_weight' in state_dict.keys(): pos_weight = state_dict.pop('edge_affinity_criterion.pos_weight') else: pos_weight = None # Load the state_dict super().load_state_dict(state_dict, strict=strict) # If need be, assign the class weights to the criterion if self.edge_affinity_criterion.pos_weight is not None: self.edge_affinity_criterion.pos_weight = pos_weight \ if pos_weight is not None else pos_weight_bckp def _load_from_checkpoint( self, checkpoint_path: str, **kwargs ) -> 'PanopticSegmentationModule': """Simpler version of `LightningModule.load_from_checkpoint()` for easier use: no need to explicitly pass `model.net`, `model.criterion`, etc. """ return self.__class__.load_from_checkpoint( checkpoint_path, net=self.net, edge_affinity_head=self.edge_affinity_head, partitioner=self.partitioner, criterion=self.criterion, **kwargs) # TODO: gridsearch instance partition parameters if __name__ == "__main__": import hydra import omegaconf import pyrootutils root = str(pyrootutils.setup_root(__file__, pythonpath=True)) cfg = omegaconf.OmegaConf.load(root + "/configs/model/panoptic/spt-2.yaml") _ = hydra.utils.instantiate(cfg)