Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| from typing import Any, Callable, cast, Generic, List, Tuple, Type, Union | |
| import torch | |
| import torch.nn.functional as F | |
| from captum._utils.common import ( | |
| _format_additional_forward_args, | |
| _format_tensor_into_tuples, | |
| _run_forward, | |
| _validate_target, | |
| ) | |
| from captum._utils.gradient import compute_gradients | |
| from captum._utils.typing import ModuleOrModuleList, TargetType | |
| from captum.attr._utils.common import ( | |
| _format_input_baseline, | |
| _sum_rows, | |
| _tensorize_baseline, | |
| _validate_input, | |
| ) | |
| from captum.log import log_usage | |
| from torch import Tensor | |
| from torch.nn import Module | |
| class Attribution: | |
| r""" | |
| All attribution algorithms extend this class. It enforces its child classes | |
| to extend and override core `attribute` method. | |
| """ | |
| def __init__(self, forward_func: Callable) -> None: | |
| r""" | |
| Args: | |
| forward_func (callable or torch.nn.Module): This can either be an instance | |
| of pytorch model or any modification of model's forward | |
| function. | |
| """ | |
| self.forward_func = forward_func | |
| attribute: Callable | |
| r""" | |
| This method computes and returns the attribution values for each input tensor. | |
| Deriving classes are responsible for implementing its logic accordingly. | |
| Specific attribution algorithms that extend this class take relevant | |
| arguments. | |
| Args: | |
| inputs (tensor or tuple of tensors): Input for which attribution | |
| is computed. It can be provided as a single tensor or | |
| a tuple of multiple tensors. If multiple input tensors | |
| are provided, the batch sizes must be aligned accross all | |
| tensors. | |
| Returns: | |
| *tensor* or tuple of *tensors* of **attributions**: | |
| - **attributions** (*tensor* or tuple of *tensors*): | |
| Attribution values for each | |
| input tensor. The `attributions` have the same shape and | |
| dimensionality as the inputs. | |
| If a single tensor is provided as inputs, a single tensor | |
| is returned. If a tuple is provided for inputs, a tuple of | |
| corresponding sized tensors is returned. | |
| """ | |
| def multiplies_by_inputs(self): | |
| return False | |
| def has_convergence_delta(self) -> bool: | |
| r""" | |
| This method informs the user whether the attribution algorithm provides | |
| a convergence delta (aka an approximation error) or not. Convergence | |
| delta may serve as a proxy of correctness of attribution algorithm's | |
| approximation. If deriving attribution class provides a | |
| `compute_convergence_delta` method, it should | |
| override both `compute_convergence_delta` and `has_convergence_delta` methods. | |
| Returns: | |
| bool: | |
| Returns whether the attribution algorithm | |
| provides a convergence delta (aka approximation error) or not. | |
| """ | |
| return False | |
| compute_convergence_delta: Callable | |
| r""" | |
| The attribution algorithms which derive `Attribution` class and provide | |
| convergence delta (aka approximation error) should implement this method. | |
| Convergence delta can be computed based on certain properties of the | |
| attribution alogrithms. | |
| Args: | |
| attributions (tensor or tuple of tensors): Attribution scores that | |
| are precomputed by an attribution algorithm. | |
| Attributions can be provided in form of a single tensor | |
| or a tuple of those. It is assumed that attribution | |
| tensor's dimension 0 corresponds to the number of | |
| examples, and if multiple input tensors are provided, | |
| the examples must be aligned appropriately. | |
| *args (optional): Additonal arguments that are used by the | |
| sub-classes depending on the specific implementation | |
| of `compute_convergence_delta`. | |
| Returns: | |
| *tensor* of **deltas**: | |
| - **deltas** (*tensor*): | |
| Depending on specific implementaion of | |
| sub-classes, convergence delta can be returned per | |
| sample in form of a tensor or it can be aggregated | |
| across multuple samples and returned in form of a | |
| single floating point tensor. | |
| """ | |
| def get_name(cls: Type["Attribution"]) -> str: | |
| r""" | |
| Create readable class name by inserting a space before any capital | |
| characters besides the very first. | |
| Returns: | |
| str: a readable class name | |
| Example: | |
| for a class called IntegratedGradients, we return the string | |
| 'Integrated Gradients' | |
| """ | |
| return "".join( | |
| [ | |
| char if char.islower() or idx == 0 else " " + char | |
| for idx, char in enumerate(cls.__name__) | |
| ] | |
| ) | |
| class GradientAttribution(Attribution): | |
| r""" | |
| All gradient based attribution algorithms extend this class. It requires a | |
| forward function, which most commonly is the forward function of the model | |
| that we want to interpret or the model itself. | |
| """ | |
| def __init__(self, forward_func: Callable) -> None: | |
| r""" | |
| Args: | |
| forward_func (callable or torch.nn.Module): This can either be an instance | |
| of pytorch model or any modification of model's forward | |
| function. | |
| """ | |
| Attribution.__init__(self, forward_func) | |
| self.gradient_func = compute_gradients | |
| def compute_convergence_delta( | |
| self, | |
| attributions: Union[Tensor, Tuple[Tensor, ...]], | |
| start_point: Union[ | |
| None, int, float, Tensor, Tuple[Union[int, float, Tensor], ...] | |
| ], | |
| end_point: Union[Tensor, Tuple[Tensor, ...]], | |
| target: TargetType = None, | |
| additional_forward_args: Any = None, | |
| ) -> Tensor: | |
| r""" | |
| Here we provide a specific implementation for `compute_convergence_delta` | |
| which is based on a common property among gradient-based attribution algorithms. | |
| In the literature sometimes it is also called completeness axiom. Completeness | |
| axiom states that the sum of the attribution must be equal to the differences of | |
| NN Models's function at its end and start points. In other words: | |
| sum(attributions) - (F(end_point) - F(start_point)) is close to zero. | |
| Returned delta of this method is defined as above stated difference. | |
| This implementation assumes that both the `start_point` and `end_point` have | |
| the same shape and dimensionality. It also assumes that the target must have | |
| the same number of examples as the `start_point` and the `end_point` in case | |
| it is provided in form of a list or a non-singleton tensor. | |
| Args: | |
| attributions (tensor or tuple of tensors): Precomputed attribution | |
| scores. The user can compute those using any attribution | |
| algorithm. It is assumed the the shape and the | |
| dimensionality of attributions must match the shape and | |
| the dimensionality of `start_point` and `end_point`. | |
| It also assumes that the attribution tensor's | |
| dimension 0 corresponds to the number of | |
| examples, and if multiple input tensors are provided, | |
| the examples must be aligned appropriately. | |
| start_point (tensor or tuple of tensors, optional): `start_point` | |
| is passed as an input to model's forward function. It | |
| is the starting point of attributions' approximation. | |
| It is assumed that both `start_point` and `end_point` | |
| have the same shape and dimensionality. | |
| end_point (tensor or tuple of tensors): `end_point` | |
| is passed as an input to model's forward function. It | |
| is the end point of attributions' approximation. | |
| It is assumed that both `start_point` and `end_point` | |
| have the same shape and dimensionality. | |
| target (int, tuple, tensor or list, optional): Output indices for | |
| which gradients are computed (for classification cases, | |
| this is usually the target class). | |
| If the network returns a scalar value per example, | |
| no target index is necessary. | |
| For general 2D outputs, targets can be either: | |
| - a single integer or a tensor containing a single | |
| integer, which is applied to all input examples | |
| - a list of integers or a 1D tensor, with length matching | |
| the number of examples in inputs (dim 0). Each integer | |
| is applied as the target for the corresponding example. | |
| For outputs with > 2 dimensions, targets can be either: | |
| - A single tuple, which contains #output_dims - 1 | |
| elements. This target index is applied to all examples. | |
| - A list of tuples with length equal to the number of | |
| examples in inputs (dim 0), and each tuple containing | |
| #output_dims - 1 elements. Each tuple is applied as the | |
| target for the corresponding example. | |
| Default: None | |
| additional_forward_args (any, optional): If the forward function | |
| requires additional arguments other than the inputs for | |
| which attributions should not be computed, this argument | |
| can be provided. It must be either a single additional | |
| argument of a Tensor or arbitrary (non-tuple) type or a | |
| tuple containing multiple additional arguments including | |
| tensors or any arbitrary python types. These arguments | |
| are provided to forward_func in order following the | |
| arguments in inputs. | |
| For a tensor, the first dimension of the tensor must | |
| correspond to the number of examples. | |
| `additional_forward_args` is used both for `start_point` | |
| and `end_point` when computing the forward pass. | |
| Default: None | |
| Returns: | |
| *tensor* of **deltas**: | |
| - **deltas** (*tensor*): | |
| This implementation returns convergence delta per | |
| sample. Deriving sub-classes may do any type of aggregation | |
| of those values, if necessary. | |
| """ | |
| end_point, start_point = _format_input_baseline(end_point, start_point) | |
| additional_forward_args = _format_additional_forward_args( | |
| additional_forward_args | |
| ) | |
| # tensorizing start_point in case it is a scalar or one example baseline | |
| # If the batch size is large we could potentially also tensorize only one | |
| # sample and expand the output to the rest of the elements in the batch | |
| start_point = _tensorize_baseline(end_point, start_point) | |
| attributions = _format_tensor_into_tuples(attributions) | |
| # verify that the attributions and end_point match on 1st dimension | |
| for attribution, end_point_tnsr in zip(attributions, end_point): | |
| assert end_point_tnsr.shape[0] == attribution.shape[0], ( | |
| "Attributions tensor and the end_point must match on the first" | |
| " dimension but found attribution: {} and end_point: {}".format( | |
| attribution.shape[0], end_point_tnsr.shape[0] | |
| ) | |
| ) | |
| num_samples = end_point[0].shape[0] | |
| _validate_input(end_point, start_point) | |
| _validate_target(num_samples, target) | |
| with torch.no_grad(): | |
| start_out_sum = _sum_rows( | |
| _run_forward( | |
| self.forward_func, start_point, target, additional_forward_args | |
| ) | |
| ) | |
| end_out_sum = _sum_rows( | |
| _run_forward( | |
| self.forward_func, end_point, target, additional_forward_args | |
| ) | |
| ) | |
| row_sums = [_sum_rows(attribution) for attribution in attributions] | |
| attr_sum = torch.stack( | |
| [cast(Tensor, sum(row_sum)) for row_sum in zip(*row_sums)] | |
| ) | |
| _delta = attr_sum - (end_out_sum - start_out_sum) | |
| return _delta | |
| class PerturbationAttribution(Attribution): | |
| r""" | |
| All perturbation based attribution algorithms extend this class. It requires a | |
| forward function, which most commonly is the forward function of the model | |
| that we want to interpret or the model itself. | |
| """ | |
| def __init__(self, forward_func: Callable) -> None: | |
| r""" | |
| Args: | |
| forward_func (callable or torch.nn.Module): This can either be an instance | |
| of pytorch model or any modification of model's forward | |
| function. | |
| """ | |
| Attribution.__init__(self, forward_func) | |
| def multiplies_by_inputs(self): | |
| return True | |
| class InternalAttribution(Attribution, Generic[ModuleOrModuleList]): | |
| layer: ModuleOrModuleList | |
| r""" | |
| Shared base class for LayerAttrubution and NeuronAttribution, | |
| attribution types that require a model and a particular layer. | |
| """ | |
| def __init__( | |
| self, | |
| forward_func: Callable, | |
| layer: ModuleOrModuleList, | |
| device_ids: Union[None, List[int]] = None, | |
| ) -> None: | |
| r""" | |
| Args: | |
| forward_func (callable or torch.nn.Module): This can either be an instance | |
| of pytorch model or any modification of model's forward | |
| function. | |
| layer (torch.nn.Module): Layer for which output attributions are computed. | |
| Output size of attribute matches that of layer output. | |
| device_ids (list(int)): Device ID list, necessary only if forward_func | |
| applies a DataParallel model, which allows reconstruction of | |
| intermediate outputs from batched results across devices. | |
| If forward_func is given as the DataParallel model itself, | |
| then it is not necessary to provide this argument. | |
| """ | |
| Attribution.__init__(self, forward_func) | |
| self.layer = layer | |
| self.device_ids = device_ids | |
| class LayerAttribution(InternalAttribution): | |
| r""" | |
| Layer attribution provides attribution values for the given layer, quanitfying | |
| the importance of each neuron within the given layer's output. The output | |
| attribution of calling attribute on a LayerAttribution object always matches | |
| the size of the layer output. | |
| """ | |
| def __init__( | |
| self, | |
| forward_func: Callable, | |
| layer: ModuleOrModuleList, | |
| device_ids: Union[None, List[int]] = None, | |
| ) -> None: | |
| r""" | |
| Args: | |
| forward_func (callable or torch.nn.Module): This can either be an instance | |
| of pytorch model or any modification of model's forward | |
| function. | |
| layer (torch.nn.Module): Layer for which output attributions are computed. | |
| Output size of attribute matches that of layer output. | |
| device_ids (list(int)): Device ID list, necessary only if forward_func | |
| applies a DataParallel model, which allows reconstruction of | |
| intermediate outputs from batched results across devices. | |
| If forward_func is given as the DataParallel model itself, | |
| then it is not necessary to provide this argument. | |
| """ | |
| InternalAttribution.__init__(self, forward_func, layer, device_ids) | |
| def interpolate( | |
| layer_attribution: Tensor, | |
| interpolate_dims: Union[int, Tuple[int, ...]], | |
| interpolate_mode: str = "nearest", | |
| ) -> Tensor: | |
| r""" | |
| Interpolates given 3D, 4D or 5D layer attribution to given dimensions. | |
| This is often utilized to upsample the attribution of a convolutional layer | |
| to the size of an input, which allows visualizing in the input space. | |
| Args: | |
| layer_attribution (torch.Tensor): Tensor of given layer attributions. | |
| interpolate_dims (int or tuple): Upsampled dimensions. The | |
| number of elements must be the number of dimensions | |
| of layer_attribution - 2, since the first dimension | |
| corresponds to number of examples and the second is | |
| assumed to correspond to the number of channels. | |
| interpolate_mode (str): Method for interpolation, which | |
| must be a valid input interpolation mode for | |
| torch.nn.functional. These methods are | |
| "nearest", "area", "linear" (3D-only), "bilinear" | |
| (4D-only), "bicubic" (4D-only), "trilinear" (5D-only) | |
| based on the number of dimensions of the given layer | |
| attribution. | |
| Returns: | |
| *tensor* of upsampled **attributions**: | |
| - **attributions** (*tensor*): | |
| Upsampled layer attributions with first 2 dimensions matching | |
| slayer_attribution and remaining dimensions given by | |
| interpolate_dims. | |
| """ | |
| return F.interpolate(layer_attribution, interpolate_dims, mode=interpolate_mode) | |
| class NeuronAttribution(InternalAttribution): | |
| r""" | |
| Neuron attribution provides input attribution for a given neuron, quanitfying | |
| the importance of each input feature in the activation of a particular neuron. | |
| Calling attribute on a NeuronAttribution object requires also providing | |
| the index of the neuron in the output of the given layer for which attributions | |
| are required. | |
| The output attribution of calling attribute on a NeuronAttribution object | |
| always matches the size of the input. | |
| """ | |
| def __init__( | |
| self, | |
| forward_func: Callable, | |
| layer: Module, | |
| device_ids: Union[None, List[int]] = None, | |
| ) -> None: | |
| r""" | |
| Args: | |
| forward_func (callable or torch.nn.Module): This can either be an instance | |
| of pytorch model or any modification of model's forward | |
| function. | |
| layer (torch.nn.Module): Layer for which output attributions are computed. | |
| Output size of attribute matches that of layer output. | |
| device_ids (list(int)): Device ID list, necessary only if forward_func | |
| applies a DataParallel model, which allows reconstruction of | |
| intermediate outputs from batched results across devices. | |
| If forward_func is given as the DataParallel model itself, | |
| then it is not necessary to provide this argument. | |
| """ | |
| InternalAttribution.__init__(self, forward_func, layer, device_ids) | |
| attribute: Callable | |
| r""" | |
| This method computes and returns the neuron attribution values for each | |
| input tensor. Deriving classes are responsible for implementing | |
| its logic accordingly. | |
| Specific attribution algorithms that extend this class take relevant | |
| arguments. | |
| Args: | |
| inputs: A single high dimensional input tensor or a tuple of them. | |
| neuron_selector (int or tuple): Tuple providing index of neuron in output | |
| of given layer for which attribution is desired. Length of | |
| this tuple must be one less than the number of | |
| dimensions in the output of the given layer (since | |
| dimension 0 corresponds to number of examples). | |
| Returns: | |
| *tensor* or tuple of *tensors* of **attributions**: | |
| - **attributions** (*tensor* or tuple of *tensors*): | |
| Attribution values for | |
| each input vector. The `attributions` have the | |
| dimensionality of inputs. | |
| """ | |