cascade / nfp /preprocessing /scaling.py
bobbypaton
Initial CASCADE HF Space deployment
233f6d4
import numpy as np
from sklearn.preprocessing import RobustScaler
from scipy import sparse
from sklearn.utils import check_array
from sklearn.utils.validation import FLOAT_DTYPES
from sklearn.preprocessing.data import _handle_zeros_in_scale
class RobustNanScaler(RobustScaler):
def _check_array(self, X, copy):
"""Makes sure centering is not enabled for sparse matrices."""
X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
estimator=self, dtype=FLOAT_DTYPES, force_all_finite=False)
if sparse.issparse(X):
if self.with_centering:
raise ValueError(
"Cannot center sparse matrices: use `with_centering=False`"
" instead. See docstring for motivation and alternatives.")
return X
def fit(self, X, y=None):
if sparse.issparse(X):
raise TypeError("RobustScaler cannot be fitted on sparse inputs")
X = self._check_array(X, self.copy)
if self.with_centering:
self.center_ = np.nanmedian(X, axis=0)
if self.with_scaling:
q_min, q_max = self.quantile_range
if not 0 <= q_min <= q_max <= 100:
raise ValueError("Invalid quantile range: %s" %
str(self.quantile_range))
q = np.nanpercentile(X, self.quantile_range, axis=0)
self.scale_ = (q[1] - q[0])
self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)
return self