import numpy as np from sklearn.preprocessing import RobustScaler from scipy import sparse from sklearn.utils import check_array from sklearn.utils.validation import FLOAT_DTYPES from sklearn.preprocessing.data import _handle_zeros_in_scale class RobustNanScaler(RobustScaler): def _check_array(self, X, copy): """Makes sure centering is not enabled for sparse matrices.""" X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=False) if sparse.issparse(X): if self.with_centering: raise ValueError( "Cannot center sparse matrices: use `with_centering=False`" " instead. See docstring for motivation and alternatives.") return X def fit(self, X, y=None): if sparse.issparse(X): raise TypeError("RobustScaler cannot be fitted on sparse inputs") X = self._check_array(X, self.copy) if self.with_centering: self.center_ = np.nanmedian(X, axis=0) if self.with_scaling: q_min, q_max = self.quantile_range if not 0 <= q_min <= q_max <= 100: raise ValueError("Invalid quantile range: %s" % str(self.quantile_range)) q = np.nanpercentile(X, self.quantile_range, axis=0) self.scale_ = (q[1] - q[0]) self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False) return self