Spaces:
Build error
Build error
| """ | |
| Contains abstract functionality for learning locally linear sparse model. | |
| """ | |
| import numpy as np | |
| import scipy as sp | |
| from sklearn.linear_model import Ridge, lars_path | |
| from sklearn.utils import check_random_state | |
| class LimeBase(object): | |
| """Class for learning a locally linear sparse model from perturbed data""" | |
| def __init__(self, | |
| kernel_fn, | |
| verbose=False, | |
| random_state=None): | |
| """Init function | |
| Args: | |
| kernel_fn: function that transforms an array of distances into an | |
| array of proximity values (floats). | |
| verbose: if true, print local prediction values from linear model. | |
| random_state: an integer or numpy.RandomState that will be used to | |
| generate random numbers. If None, the random state will be | |
| initialized using the internal numpy seed. | |
| """ | |
| self.kernel_fn = kernel_fn | |
| self.verbose = verbose | |
| self.random_state = check_random_state(random_state) | |
| def generate_lars_path(weighted_data, weighted_labels): | |
| """Generates the lars path for weighted data. | |
| Args: | |
| weighted_data: data that has been weighted by kernel | |
| weighted_label: labels, weighted by kernel | |
| Returns: | |
| (alphas, coefs), both are arrays corresponding to the | |
| regularization parameter and coefficients, respectively | |
| """ | |
| x_vector = weighted_data | |
| alphas, _, coefs = lars_path(x_vector, | |
| weighted_labels, | |
| method='lasso', | |
| verbose=False) | |
| return alphas, coefs | |
| def forward_selection(self, data, labels, weights, num_features): | |
| """Iteratively adds features to the model""" | |
| clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) | |
| used_features = [] | |
| for _ in range(min(num_features, data.shape[1])): | |
| max_ = -100000000 | |
| best = 0 | |
| for feature in range(data.shape[1]): | |
| if feature in used_features: | |
| continue | |
| clf.fit(data[:, used_features + [feature]], labels, | |
| sample_weight=weights) | |
| score = clf.score(data[:, used_features + [feature]], | |
| labels, | |
| sample_weight=weights) | |
| if score > max_: | |
| best = feature | |
| max_ = score | |
| used_features.append(best) | |
| return np.array(used_features) | |
| def feature_selection(self, data, labels, weights, num_features, method): | |
| """Selects features for the model. see explain_instance_with_data to | |
| understand the parameters.""" | |
| if method == 'none': | |
| return np.array(range(data.shape[1])) | |
| elif method == 'forward_selection': | |
| return self.forward_selection(data, labels, weights, num_features) | |
| elif method == 'highest_weights': | |
| clf = Ridge(alpha=0.01, fit_intercept=True, | |
| random_state=self.random_state) | |
| # print("data shape: ", data.shape) | |
| # print("labels shape: ", labels.shape) | |
| # assert(False) | |
| clf.fit(data, labels, sample_weight=weights) | |
| coef = clf.coef_ | |
| if sp.sparse.issparse(data): | |
| coef = sp.sparse.csr_matrix(clf.coef_) | |
| weighted_data = coef.multiply(data[0]) | |
| # Note: most efficient to slice the data before reversing | |
| sdata = len(weighted_data.data) | |
| argsort_data = np.abs(weighted_data.data).argsort() | |
| # Edge case where data is more sparse than requested number of feature importances | |
| # In that case, we just pad with zero-valued features | |
| if sdata < num_features: | |
| nnz_indexes = argsort_data[::-1] | |
| indices = weighted_data.indices[nnz_indexes] | |
| num_to_pad = num_features - sdata | |
| indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype))) | |
| indices_set = set(indices) | |
| pad_counter = 0 | |
| for i in range(data.shape[1]): | |
| if i not in indices_set: | |
| indices[pad_counter + sdata] = i | |
| pad_counter += 1 | |
| if pad_counter >= num_to_pad: | |
| break | |
| else: | |
| nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] | |
| indices = weighted_data.indices[nnz_indexes] | |
| return indices | |
| else: | |
| weighted_data = coef * data[0] | |
| feature_weights = sorted( | |
| zip(range(data.shape[1]), weighted_data), | |
| key=lambda x: np.abs(x[1]), | |
| reverse=True) | |
| return np.array([x[0] for x in feature_weights[:num_features]]) | |
| elif method == 'lasso_path': | |
| weighted_data = ((data - np.average(data, axis=0, weights=weights)) | |
| * np.sqrt(weights[:, np.newaxis])) | |
| weighted_labels = ((labels - np.average(labels, weights=weights)) | |
| * np.sqrt(weights)) | |
| nonzero = range(weighted_data.shape[1]) | |
| _, coefs = self.generate_lars_path(weighted_data, | |
| weighted_labels) | |
| for i in range(len(coefs.T) - 1, 0, -1): | |
| nonzero = coefs.T[i].nonzero()[0] | |
| if len(nonzero) <= num_features: | |
| break | |
| used_features = nonzero | |
| return used_features | |
| elif method == 'auto': | |
| if num_features <= 6: | |
| n_method = 'forward_selection' | |
| else: | |
| n_method = 'highest_weights' | |
| return self.feature_selection(data, labels, weights, | |
| num_features, n_method) | |
| def explain_instance_with_data(self, | |
| neighborhood_data, | |
| neighborhood_labels, | |
| distances, | |
| label, | |
| num_features, | |
| feature_selection='auto', | |
| model_regressor=None): | |
| """Takes perturbed data, labels and distances, returns explanation. | |
| Args: | |
| neighborhood_data: perturbed data, 2d array. first element is | |
| assumed to be the original data point. | |
| neighborhood_labels: corresponding perturbed labels. should have as | |
| many columns as the number of possible labels. | |
| distances: distances to original data point. | |
| label: label for which we want an explanation | |
| num_features: maximum number of features in explanation | |
| feature_selection: how to select num_features. options are: | |
| 'forward_selection': iteratively add features to the model. | |
| This is costly when num_features is high | |
| 'highest_weights': selects the features that have the highest | |
| product of absolute weight * original data point when | |
| learning with all the features | |
| 'lasso_path': chooses features based on the lasso | |
| regularization path | |
| 'none': uses all features, ignores num_features | |
| 'auto': uses forward_selection if num_features <= 6, and | |
| 'highest_weights' otherwise. | |
| model_regressor: sklearn regressor to use in explanation. | |
| Defaults to Ridge regression if None. Must have | |
| model_regressor.coef_ and 'sample_weight' as a parameter | |
| to model_regressor.fit() | |
| Returns: | |
| (intercept, exp, score, local_pred): | |
| intercept is a float. | |
| exp is a sorted list of tuples, where each tuple (x,y) corresponds | |
| to the feature id (x) and the local weight (y). The list is sorted | |
| by decreasing absolute value of y. | |
| score is the R^2 value of the returned explanation | |
| local_pred is the prediction of the explanation model on the original instance | |
| """ | |
| weights = self.kernel_fn(distances) | |
| labels_column = neighborhood_labels[:, label] | |
| used_features = self.feature_selection(neighborhood_data, | |
| labels_column, | |
| weights, | |
| num_features, | |
| feature_selection) | |
| if model_regressor is None: | |
| model_regressor = Ridge(alpha=1, fit_intercept=True, | |
| random_state=self.random_state) | |
| easy_model = model_regressor | |
| easy_model.fit(neighborhood_data[:, used_features], | |
| labels_column, sample_weight=weights) | |
| prediction_score = easy_model.score( | |
| neighborhood_data[:, used_features], | |
| labels_column, sample_weight=weights) | |
| local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) | |
| if self.verbose: | |
| print('Intercept', easy_model.intercept_) | |
| print('Prediction_local', local_pred,) | |
| print('Right:', neighborhood_labels[0, label]) | |
| return (easy_model.intercept_, | |
| sorted(zip(used_features, easy_model.coef_), | |
| key=lambda x: np.abs(x[1]), reverse=True), | |
| prediction_score, local_pred) | |