| """Linear and quadratic discriminant analysis.""" |
|
|
| |
| |
|
|
| import warnings |
| from numbers import Integral, Real |
|
|
| import numpy as np |
| import scipy.linalg |
| from scipy import linalg |
|
|
| from .base import ( |
| BaseEstimator, |
| ClassifierMixin, |
| ClassNamePrefixFeaturesOutMixin, |
| TransformerMixin, |
| _fit_context, |
| ) |
| from .covariance import empirical_covariance, ledoit_wolf, shrunk_covariance |
| from .linear_model._base import LinearClassifierMixin |
| from .preprocessing import StandardScaler |
| from .utils._array_api import _expit, device, get_namespace, size |
| from .utils._param_validation import HasMethods, Interval, StrOptions |
| from .utils.extmath import softmax |
| from .utils.multiclass import check_classification_targets, unique_labels |
| from .utils.validation import check_is_fitted, validate_data |
|
|
| __all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"] |
|
|
|
|
| def _cov(X, shrinkage=None, covariance_estimator=None): |
| """Estimate covariance matrix (using optional covariance_estimator). |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Input data. |
| |
| shrinkage : {'empirical', 'auto'} or float, default=None |
| Shrinkage parameter, possible values: |
| - None or 'empirical': no shrinkage (default). |
| - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. |
| - float between 0 and 1: fixed shrinkage parameter. |
| |
| Shrinkage parameter is ignored if `covariance_estimator` |
| is not None. |
| |
| covariance_estimator : estimator, default=None |
| If not None, `covariance_estimator` is used to estimate |
| the covariance matrices instead of relying on the empirical |
| covariance estimator (with potential shrinkage). |
| The object should have a fit method and a ``covariance_`` attribute |
| like the estimators in :mod:`sklearn.covariance``. |
| if None the shrinkage parameter drives the estimate. |
| |
| .. versionadded:: 0.24 |
| |
| Returns |
| ------- |
| s : ndarray of shape (n_features, n_features) |
| Estimated covariance matrix. |
| """ |
| if covariance_estimator is None: |
| shrinkage = "empirical" if shrinkage is None else shrinkage |
| if isinstance(shrinkage, str): |
| if shrinkage == "auto": |
| sc = StandardScaler() |
| X = sc.fit_transform(X) |
| s = ledoit_wolf(X)[0] |
| |
| s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :] |
| elif shrinkage == "empirical": |
| s = empirical_covariance(X) |
| elif isinstance(shrinkage, Real): |
| s = shrunk_covariance(empirical_covariance(X), shrinkage) |
| else: |
| if shrinkage is not None and shrinkage != 0: |
| raise ValueError( |
| "covariance_estimator and shrinkage parameters " |
| "are not None. Only one of the two can be set." |
| ) |
| covariance_estimator.fit(X) |
| if not hasattr(covariance_estimator, "covariance_"): |
| raise ValueError( |
| "%s does not have a covariance_ attribute" |
| % covariance_estimator.__class__.__name__ |
| ) |
| s = covariance_estimator.covariance_ |
| return s |
|
|
|
|
| def _class_means(X, y): |
| """Compute class means. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Input data. |
| |
| y : array-like of shape (n_samples,) or (n_samples, n_targets) |
| Target values. |
| |
| Returns |
| ------- |
| means : array-like of shape (n_classes, n_features) |
| Class means. |
| """ |
| xp, is_array_api_compliant = get_namespace(X) |
| classes, y = xp.unique_inverse(y) |
| means = xp.zeros((classes.shape[0], X.shape[1]), device=device(X), dtype=X.dtype) |
|
|
| if is_array_api_compliant: |
| for i in range(classes.shape[0]): |
| means[i, :] = xp.mean(X[y == i], axis=0) |
| else: |
| |
| |
| cnt = np.bincount(y) |
| np.add.at(means, y, X) |
| means /= cnt[:, None] |
| return means |
|
|
|
|
| def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None): |
| """Compute weighted within-class covariance matrix. |
| |
| The per-class covariance are weighted by the class priors. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Input data. |
| |
| y : array-like of shape (n_samples,) or (n_samples, n_targets) |
| Target values. |
| |
| priors : array-like of shape (n_classes,) |
| Class priors. |
| |
| shrinkage : 'auto' or float, default=None |
| Shrinkage parameter, possible values: |
| - None: no shrinkage (default). |
| - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. |
| - float between 0 and 1: fixed shrinkage parameter. |
| |
| Shrinkage parameter is ignored if `covariance_estimator` is not None. |
| |
| covariance_estimator : estimator, default=None |
| If not None, `covariance_estimator` is used to estimate |
| the covariance matrices instead of relying the empirical |
| covariance estimator (with potential shrinkage). |
| The object should have a fit method and a ``covariance_`` attribute |
| like the estimators in sklearn.covariance. |
| If None, the shrinkage parameter drives the estimate. |
| |
| .. versionadded:: 0.24 |
| |
| Returns |
| ------- |
| cov : array-like of shape (n_features, n_features) |
| Weighted within-class covariance matrix |
| """ |
| classes = np.unique(y) |
| cov = np.zeros(shape=(X.shape[1], X.shape[1])) |
| for idx, group in enumerate(classes): |
| Xg = X[y == group, :] |
| cov += priors[idx] * np.atleast_2d(_cov(Xg, shrinkage, covariance_estimator)) |
| return cov |
|
|
|
|
| class DiscriminantAnalysisPredictionMixin: |
| """Mixin class for QuadraticDiscriminantAnalysis and NearestCentroid.""" |
|
|
| def decision_function(self, X): |
| """Apply decision function to an array of samples. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| Array of samples (test vectors). |
| |
| Returns |
| ------- |
| y_scores : ndarray of shape (n_samples,) or (n_samples, n_classes) |
| Decision function values related to each class, per sample. |
| In the two-class case, the shape is `(n_samples,)`, giving the |
| log likelihood ratio of the positive class. |
| """ |
| y_scores = self._decision_function(X) |
| if len(self.classes_) == 2: |
| return y_scores[:, 1] - y_scores[:, 0] |
| return y_scores |
|
|
| def predict(self, X): |
| """Perform classification on an array of vectors `X`. |
| |
| Returns the class label for each sample. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| Input vectors, where `n_samples` is the number of samples and |
| `n_features` is the number of features. |
| |
| Returns |
| ------- |
| y_pred : ndarray of shape (n_samples,) |
| Class label for each sample. |
| """ |
| scores = self._decision_function(X) |
| return self.classes_.take(scores.argmax(axis=1)) |
|
|
| def predict_proba(self, X): |
| """Estimate class probabilities. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| Input data. |
| |
| Returns |
| ------- |
| y_proba : ndarray of shape (n_samples, n_classes) |
| Probability estimate of the sample for each class in the |
| model, where classes are ordered as they are in `self.classes_`. |
| """ |
| return np.exp(self.predict_log_proba(X)) |
|
|
| def predict_log_proba(self, X): |
| """Estimate log class probabilities. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| Input data. |
| |
| Returns |
| ------- |
| y_log_proba : ndarray of shape (n_samples, n_classes) |
| Estimated log probabilities. |
| """ |
| scores = self._decision_function(X) |
| log_likelihood = scores - scores.max(axis=1)[:, np.newaxis] |
| return log_likelihood - np.log( |
| np.exp(log_likelihood).sum(axis=1)[:, np.newaxis] |
| ) |
|
|
|
|
| class LinearDiscriminantAnalysis( |
| ClassNamePrefixFeaturesOutMixin, |
| LinearClassifierMixin, |
| TransformerMixin, |
| BaseEstimator, |
| ): |
| """Linear Discriminant Analysis. |
| |
| A classifier with a linear decision boundary, generated by fitting class |
| conditional densities to the data and using Bayes' rule. |
| |
| The model fits a Gaussian density to each class, assuming that all classes |
| share the same covariance matrix. |
| |
| The fitted model can also be used to reduce the dimensionality of the input |
| by projecting it to the most discriminative directions, using the |
| `transform` method. |
| |
| .. versionadded:: 0.17 |
| |
| For a comparison between |
| :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis` |
| and :class:`~sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`, see |
| :ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`. |
| |
| Read more in the :ref:`User Guide <lda_qda>`. |
| |
| Parameters |
| ---------- |
| solver : {'svd', 'lsqr', 'eigen'}, default='svd' |
| Solver to use, possible values: |
| - 'svd': Singular value decomposition (default). |
| Does not compute the covariance matrix, therefore this solver is |
| recommended for data with a large number of features. |
| - 'lsqr': Least squares solution. |
| Can be combined with shrinkage or custom covariance estimator. |
| - 'eigen': Eigenvalue decomposition. |
| Can be combined with shrinkage or custom covariance estimator. |
| |
| .. versionchanged:: 1.2 |
| `solver="svd"` now has experimental Array API support. See the |
| :ref:`Array API User Guide <array_api>` for more details. |
| |
| shrinkage : 'auto' or float, default=None |
| Shrinkage parameter, possible values: |
| - None: no shrinkage (default). |
| - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. |
| - float between 0 and 1: fixed shrinkage parameter. |
| |
| This should be left to None if `covariance_estimator` is used. |
| Note that shrinkage works only with 'lsqr' and 'eigen' solvers. |
| |
| For a usage example, see |
| :ref:`sphx_glr_auto_examples_classification_plot_lda.py`. |
| |
| priors : array-like of shape (n_classes,), default=None |
| The class prior probabilities. By default, the class proportions are |
| inferred from the training data. |
| |
| n_components : int, default=None |
| Number of components (<= min(n_classes - 1, n_features)) for |
| dimensionality reduction. If None, will be set to |
| min(n_classes - 1, n_features). This parameter only affects the |
| `transform` method. |
| |
| For a usage example, see |
| :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`. |
| |
| store_covariance : bool, default=False |
| If True, explicitly compute the weighted within-class covariance |
| matrix when solver is 'svd'. The matrix is always computed |
| and stored for the other solvers. |
| |
| .. versionadded:: 0.17 |
| |
| tol : float, default=1.0e-4 |
| Absolute threshold for a singular value of X to be considered |
| significant, used to estimate the rank of X. Dimensions whose |
| singular values are non-significant are discarded. Only used if |
| solver is 'svd'. |
| |
| .. versionadded:: 0.17 |
| |
| covariance_estimator : covariance estimator, default=None |
| If not None, `covariance_estimator` is used to estimate |
| the covariance matrices instead of relying on the empirical |
| covariance estimator (with potential shrinkage). |
| The object should have a fit method and a ``covariance_`` attribute |
| like the estimators in :mod:`sklearn.covariance`. |
| if None the shrinkage parameter drives the estimate. |
| |
| This should be left to None if `shrinkage` is used. |
| Note that `covariance_estimator` works only with 'lsqr' and 'eigen' |
| solvers. |
| |
| .. versionadded:: 0.24 |
| |
| Attributes |
| ---------- |
| coef_ : ndarray of shape (n_features,) or (n_classes, n_features) |
| Weight vector(s). |
| |
| intercept_ : ndarray of shape (n_classes,) |
| Intercept term. |
| |
| covariance_ : array-like of shape (n_features, n_features) |
| Weighted within-class covariance matrix. It corresponds to |
| `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the |
| samples in class `k`. The `C_k` are estimated using the (potentially |
| shrunk) biased estimator of covariance. If solver is 'svd', only |
| exists when `store_covariance` is True. |
| |
| explained_variance_ratio_ : ndarray of shape (n_components,) |
| Percentage of variance explained by each of the selected components. |
| If ``n_components`` is not set then all components are stored and the |
| sum of explained variances is equal to 1.0. Only available when eigen |
| or svd solver is used. |
| |
| means_ : array-like of shape (n_classes, n_features) |
| Class-wise means. |
| |
| priors_ : array-like of shape (n_classes,) |
| Class priors (sum to 1). |
| |
| scalings_ : array-like of shape (rank, n_classes - 1) |
| Scaling of the features in the space spanned by the class centroids. |
| Only available for 'svd' and 'eigen' solvers. |
| |
| xbar_ : array-like of shape (n_features,) |
| Overall mean. Only present if solver is 'svd'. |
| |
| classes_ : array-like of shape (n_classes,) |
| Unique class labels. |
| |
| n_features_in_ : int |
| Number of features seen during :term:`fit`. |
| |
| .. versionadded:: 0.24 |
| |
| feature_names_in_ : ndarray of shape (`n_features_in_`,) |
| Names of features seen during :term:`fit`. Defined only when `X` |
| has feature names that are all strings. |
| |
| .. versionadded:: 1.0 |
| |
| See Also |
| -------- |
| QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis. |
| |
| Examples |
| -------- |
| >>> import numpy as np |
| >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis |
| >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) |
| >>> y = np.array([1, 1, 1, 2, 2, 2]) |
| >>> clf = LinearDiscriminantAnalysis() |
| >>> clf.fit(X, y) |
| LinearDiscriminantAnalysis() |
| >>> print(clf.predict([[-0.8, -1]])) |
| [1] |
| """ |
|
|
| _parameter_constraints: dict = { |
| "solver": [StrOptions({"svd", "lsqr", "eigen"})], |
| "shrinkage": [StrOptions({"auto"}), Interval(Real, 0, 1, closed="both"), None], |
| "n_components": [Interval(Integral, 1, None, closed="left"), None], |
| "priors": ["array-like", None], |
| "store_covariance": ["boolean"], |
| "tol": [Interval(Real, 0, None, closed="left")], |
| "covariance_estimator": [HasMethods("fit"), None], |
| } |
|
|
| def __init__( |
| self, |
| solver="svd", |
| shrinkage=None, |
| priors=None, |
| n_components=None, |
| store_covariance=False, |
| tol=1e-4, |
| covariance_estimator=None, |
| ): |
| self.solver = solver |
| self.shrinkage = shrinkage |
| self.priors = priors |
| self.n_components = n_components |
| self.store_covariance = store_covariance |
| self.tol = tol |
| self.covariance_estimator = covariance_estimator |
|
|
| def _solve_lstsq(self, X, y, shrinkage, covariance_estimator): |
| """Least squares solver. |
| |
| The least squares solver computes a straightforward solution of the |
| optimal decision rule based directly on the discriminant functions. It |
| can only be used for classification (with any covariance estimator), |
| because |
| estimation of eigenvectors is not performed. Therefore, dimensionality |
| reduction with the transform is not supported. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Training data. |
| |
| y : array-like of shape (n_samples,) or (n_samples, n_classes) |
| Target values. |
| |
| shrinkage : 'auto', float or None |
| Shrinkage parameter, possible values: |
| - None: no shrinkage. |
| - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. |
| - float between 0 and 1: fixed shrinkage parameter. |
| |
| Shrinkage parameter is ignored if `covariance_estimator` i |
| not None |
| |
| covariance_estimator : estimator, default=None |
| If not None, `covariance_estimator` is used to estimate |
| the covariance matrices instead of relying the empirical |
| covariance estimator (with potential shrinkage). |
| The object should have a fit method and a ``covariance_`` attribute |
| like the estimators in sklearn.covariance. |
| if None the shrinkage parameter drives the estimate. |
| |
| .. versionadded:: 0.24 |
| |
| Notes |
| ----- |
| This solver is based on [1]_, section 2.6.2, pp. 39-41. |
| |
| References |
| ---------- |
| .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification |
| (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN |
| 0-471-05669-3. |
| """ |
| self.means_ = _class_means(X, y) |
| self.covariance_ = _class_cov( |
| X, y, self.priors_, shrinkage, covariance_estimator |
| ) |
| self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T |
| self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log( |
| self.priors_ |
| ) |
|
|
| def _solve_eigen(self, X, y, shrinkage, covariance_estimator): |
| """Eigenvalue solver. |
| |
| The eigenvalue solver computes the optimal solution of the Rayleigh |
| coefficient (basically the ratio of between class scatter to within |
| class scatter). This solver supports both classification and |
| dimensionality reduction (with any covariance estimator). |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Training data. |
| |
| y : array-like of shape (n_samples,) or (n_samples, n_targets) |
| Target values. |
| |
| shrinkage : 'auto', float or None |
| Shrinkage parameter, possible values: |
| - None: no shrinkage. |
| - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. |
| - float between 0 and 1: fixed shrinkage constant. |
| |
| Shrinkage parameter is ignored if `covariance_estimator` i |
| not None |
| |
| covariance_estimator : estimator, default=None |
| If not None, `covariance_estimator` is used to estimate |
| the covariance matrices instead of relying the empirical |
| covariance estimator (with potential shrinkage). |
| The object should have a fit method and a ``covariance_`` attribute |
| like the estimators in sklearn.covariance. |
| if None the shrinkage parameter drives the estimate. |
| |
| .. versionadded:: 0.24 |
| |
| Notes |
| ----- |
| This solver is based on [1]_, section 3.8.3, pp. 121-124. |
| |
| References |
| ---------- |
| .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification |
| (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN |
| 0-471-05669-3. |
| """ |
| self.means_ = _class_means(X, y) |
| self.covariance_ = _class_cov( |
| X, y, self.priors_, shrinkage, covariance_estimator |
| ) |
|
|
| Sw = self.covariance_ |
| St = _cov(X, shrinkage, covariance_estimator) |
| Sb = St - Sw |
|
|
| evals, evecs = linalg.eigh(Sb, Sw) |
| self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][ |
| : self._max_components |
| ] |
| evecs = evecs[:, np.argsort(evals)[::-1]] |
|
|
| self.scalings_ = evecs |
| self.coef_ = np.dot(self.means_, evecs).dot(evecs.T) |
| self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log( |
| self.priors_ |
| ) |
|
|
| def _solve_svd(self, X, y): |
| """SVD solver. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Training data. |
| |
| y : array-like of shape (n_samples,) or (n_samples, n_targets) |
| Target values. |
| """ |
| xp, is_array_api_compliant = get_namespace(X) |
|
|
| if is_array_api_compliant: |
| svd = xp.linalg.svd |
| else: |
| svd = scipy.linalg.svd |
|
|
| n_samples, n_features = X.shape |
| n_classes = self.classes_.shape[0] |
|
|
| self.means_ = _class_means(X, y) |
| if self.store_covariance: |
| self.covariance_ = _class_cov(X, y, self.priors_) |
|
|
| Xc = [] |
| for idx, group in enumerate(self.classes_): |
| Xg = X[y == group] |
| Xc.append(Xg - self.means_[idx, :]) |
|
|
| self.xbar_ = self.priors_ @ self.means_ |
|
|
| Xc = xp.concat(Xc, axis=0) |
|
|
| |
| std = xp.std(Xc, axis=0) |
| |
| std[std == 0] = 1.0 |
| fac = xp.asarray(1.0 / (n_samples - n_classes), dtype=X.dtype) |
|
|
| |
| X = xp.sqrt(fac) * (Xc / std) |
| |
| U, S, Vt = svd(X, full_matrices=False) |
|
|
| rank = xp.sum(xp.astype(S > self.tol, xp.int32)) |
| |
| scalings = (Vt[:rank, :] / std).T / S[:rank] |
| fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1) |
|
|
| |
| |
| X = ( |
| (xp.sqrt((n_samples * self.priors_) * fac)) * (self.means_ - self.xbar_).T |
| ).T @ scalings |
| |
| |
| |
| _, S, Vt = svd(X, full_matrices=False) |
|
|
| if self._max_components == 0: |
| self.explained_variance_ratio_ = xp.empty((0,), dtype=S.dtype) |
| else: |
| self.explained_variance_ratio_ = (S**2 / xp.sum(S**2))[ |
| : self._max_components |
| ] |
|
|
| rank = xp.sum(xp.astype(S > self.tol * S[0], xp.int32)) |
| self.scalings_ = scalings @ Vt.T[:, :rank] |
| coef = (self.means_ - self.xbar_) @ self.scalings_ |
| self.intercept_ = -0.5 * xp.sum(coef**2, axis=1) + xp.log(self.priors_) |
| self.coef_ = coef @ self.scalings_.T |
| self.intercept_ -= self.xbar_ @ self.coef_.T |
|
|
| @_fit_context( |
| |
| prefer_skip_nested_validation=False |
| ) |
| def fit(self, X, y): |
| """Fit the Linear Discriminant Analysis model. |
| |
| .. versionchanged:: 0.19 |
| `store_covariance` and `tol` has been moved to main constructor. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Training data. |
| |
| y : array-like of shape (n_samples,) |
| Target values. |
| |
| Returns |
| ------- |
| self : object |
| Fitted estimator. |
| """ |
| xp, _ = get_namespace(X) |
|
|
| X, y = validate_data( |
| self, X, y, ensure_min_samples=2, dtype=[xp.float64, xp.float32] |
| ) |
| self.classes_ = unique_labels(y) |
| n_samples, _ = X.shape |
| n_classes = self.classes_.shape[0] |
|
|
| if n_samples == n_classes: |
| raise ValueError( |
| "The number of samples must be more than the number of classes." |
| ) |
|
|
| if self.priors is None: |
| _, cnts = xp.unique_counts(y) |
| self.priors_ = xp.astype(cnts, X.dtype) / float(y.shape[0]) |
| else: |
| self.priors_ = xp.asarray(self.priors, dtype=X.dtype) |
|
|
| if xp.any(self.priors_ < 0): |
| raise ValueError("priors must be non-negative") |
|
|
| if xp.abs(xp.sum(self.priors_) - 1.0) > 1e-5: |
| warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning) |
| self.priors_ = self.priors_ / self.priors_.sum() |
|
|
| |
| |
| max_components = min(n_classes - 1, X.shape[1]) |
|
|
| if self.n_components is None: |
| self._max_components = max_components |
| else: |
| if self.n_components > max_components: |
| raise ValueError( |
| "n_components cannot be larger than min(n_features, n_classes - 1)." |
| ) |
| self._max_components = self.n_components |
|
|
| if self.solver == "svd": |
| if self.shrinkage is not None: |
| raise NotImplementedError("shrinkage not supported with 'svd' solver.") |
| if self.covariance_estimator is not None: |
| raise ValueError( |
| "covariance estimator " |
| "is not supported " |
| "with svd solver. Try another solver" |
| ) |
| self._solve_svd(X, y) |
| elif self.solver == "lsqr": |
| self._solve_lstsq( |
| X, |
| y, |
| shrinkage=self.shrinkage, |
| covariance_estimator=self.covariance_estimator, |
| ) |
| elif self.solver == "eigen": |
| self._solve_eigen( |
| X, |
| y, |
| shrinkage=self.shrinkage, |
| covariance_estimator=self.covariance_estimator, |
| ) |
| if size(self.classes_) == 2: |
| coef_ = xp.asarray(self.coef_[1, :] - self.coef_[0, :], dtype=X.dtype) |
| self.coef_ = xp.reshape(coef_, (1, -1)) |
| intercept_ = xp.asarray( |
| self.intercept_[1] - self.intercept_[0], dtype=X.dtype |
| ) |
| self.intercept_ = xp.reshape(intercept_, (1,)) |
| self._n_features_out = self._max_components |
| return self |
|
|
| def transform(self, X): |
| """Project data to maximize class separation. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Input data. |
| |
| Returns |
| ------- |
| X_new : ndarray of shape (n_samples, n_components) or \ |
| (n_samples, min(rank, n_components)) |
| Transformed data. In the case of the 'svd' solver, the shape |
| is (n_samples, min(rank, n_components)). |
| """ |
| if self.solver == "lsqr": |
| raise NotImplementedError( |
| "transform not implemented for 'lsqr' solver (use 'svd' or 'eigen')." |
| ) |
| check_is_fitted(self) |
| xp, _ = get_namespace(X) |
| X = validate_data(self, X, reset=False) |
|
|
| if self.solver == "svd": |
| X_new = (X - self.xbar_) @ self.scalings_ |
| elif self.solver == "eigen": |
| X_new = X @ self.scalings_ |
|
|
| return X_new[:, : self._max_components] |
|
|
| def predict_proba(self, X): |
| """Estimate probability. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Input data. |
| |
| Returns |
| ------- |
| C : ndarray of shape (n_samples, n_classes) |
| Estimated probabilities. |
| """ |
| check_is_fitted(self) |
| xp, is_array_api_compliant = get_namespace(X) |
| decision = self.decision_function(X) |
| if size(self.classes_) == 2: |
| proba = _expit(decision, xp) |
| return xp.stack([1 - proba, proba], axis=1) |
| else: |
| return softmax(decision) |
|
|
| def predict_log_proba(self, X): |
| """Estimate log probability. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Input data. |
| |
| Returns |
| ------- |
| C : ndarray of shape (n_samples, n_classes) |
| Estimated log probabilities. |
| """ |
| xp, _ = get_namespace(X) |
| prediction = self.predict_proba(X) |
|
|
| info = xp.finfo(prediction.dtype) |
| if hasattr(info, "smallest_normal"): |
| smallest_normal = info.smallest_normal |
| else: |
| |
| smallest_normal = info.tiny |
|
|
| prediction[prediction == 0.0] += smallest_normal |
| return xp.log(prediction) |
|
|
| def decision_function(self, X): |
| """Apply decision function to an array of samples. |
| |
| The decision function is equal (up to a constant factor) to the |
| log-posterior of the model, i.e. `log p(y = k | x)`. In a binary |
| classification setting this instead corresponds to the difference |
| `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Array of samples (test vectors). |
| |
| Returns |
| ------- |
| y_scores : ndarray of shape (n_samples,) or (n_samples, n_classes) |
| Decision function values related to each class, per sample. |
| In the two-class case, the shape is `(n_samples,)`, giving the |
| log likelihood ratio of the positive class. |
| """ |
| |
| return super().decision_function(X) |
|
|
| def __sklearn_tags__(self): |
| tags = super().__sklearn_tags__() |
| tags.array_api_support = True |
| return tags |
|
|
|
|
| class QuadraticDiscriminantAnalysis( |
| DiscriminantAnalysisPredictionMixin, ClassifierMixin, BaseEstimator |
| ): |
| """Quadratic Discriminant Analysis. |
| |
| A classifier with a quadratic decision boundary, generated |
| by fitting class conditional densities to the data |
| and using Bayes' rule. |
| |
| The model fits a Gaussian density to each class. |
| |
| .. versionadded:: 0.17 |
| |
| For a comparison between |
| :class:`~sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis` |
| and :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`, see |
| :ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`. |
| |
| Read more in the :ref:`User Guide <lda_qda>`. |
| |
| Parameters |
| ---------- |
| priors : array-like of shape (n_classes,), default=None |
| Class priors. By default, the class proportions are inferred from the |
| training data. |
| |
| reg_param : float, default=0.0 |
| Regularizes the per-class covariance estimates by transforming S2 as |
| ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``, |
| where S2 corresponds to the `scaling_` attribute of a given class. |
| |
| store_covariance : bool, default=False |
| If True, the class covariance matrices are explicitly computed and |
| stored in the `self.covariance_` attribute. |
| |
| .. versionadded:: 0.17 |
| |
| tol : float, default=1.0e-4 |
| Absolute threshold for the covariance matrix to be considered rank |
| deficient after applying some regularization (see `reg_param`) to each |
| `Sk` where `Sk` represents covariance matrix for k-th class. This |
| parameter does not affect the predictions. It controls when a warning |
| is raised if the covariance matrix is not full rank. |
| |
| .. versionadded:: 0.17 |
| |
| Attributes |
| ---------- |
| covariance_ : list of len n_classes of ndarray \ |
| of shape (n_features, n_features) |
| For each class, gives the covariance matrix estimated using the |
| samples of that class. The estimations are unbiased. Only present if |
| `store_covariance` is True. |
| |
| means_ : array-like of shape (n_classes, n_features) |
| Class-wise means. |
| |
| priors_ : array-like of shape (n_classes,) |
| Class priors (sum to 1). |
| |
| rotations_ : list of len n_classes of ndarray of shape (n_features, n_k) |
| For each class k an array of shape (n_features, n_k), where |
| ``n_k = min(n_features, number of elements in class k)`` |
| It is the rotation of the Gaussian distribution, i.e. its |
| principal axis. It corresponds to `V`, the matrix of eigenvectors |
| coming from the SVD of `Xk = U S Vt` where `Xk` is the centered |
| matrix of samples from class k. |
| |
| scalings_ : list of len n_classes of ndarray of shape (n_k,) |
| For each class, contains the scaling of |
| the Gaussian distributions along its principal axes, i.e. the |
| variance in the rotated coordinate system. It corresponds to `S^2 / |
| (n_samples - 1)`, where `S` is the diagonal matrix of singular values |
| from the SVD of `Xk`, where `Xk` is the centered matrix of samples |
| from class k. |
| |
| classes_ : ndarray of shape (n_classes,) |
| Unique class labels. |
| |
| n_features_in_ : int |
| Number of features seen during :term:`fit`. |
| |
| .. versionadded:: 0.24 |
| |
| feature_names_in_ : ndarray of shape (`n_features_in_`,) |
| Names of features seen during :term:`fit`. Defined only when `X` |
| has feature names that are all strings. |
| |
| .. versionadded:: 1.0 |
| |
| See Also |
| -------- |
| LinearDiscriminantAnalysis : Linear Discriminant Analysis. |
| |
| Examples |
| -------- |
| >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis |
| >>> import numpy as np |
| >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) |
| >>> y = np.array([1, 1, 1, 2, 2, 2]) |
| >>> clf = QuadraticDiscriminantAnalysis() |
| >>> clf.fit(X, y) |
| QuadraticDiscriminantAnalysis() |
| >>> print(clf.predict([[-0.8, -1]])) |
| [1] |
| """ |
|
|
| _parameter_constraints: dict = { |
| "priors": ["array-like", None], |
| "reg_param": [Interval(Real, 0, 1, closed="both")], |
| "store_covariance": ["boolean"], |
| "tol": [Interval(Real, 0, None, closed="left")], |
| } |
|
|
| def __init__( |
| self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4 |
| ): |
| self.priors = priors |
| self.reg_param = reg_param |
| self.store_covariance = store_covariance |
| self.tol = tol |
|
|
| @_fit_context(prefer_skip_nested_validation=True) |
| def fit(self, X, y): |
| """Fit the model according to the given training data and parameters. |
| |
| .. versionchanged:: 0.19 |
| ``store_covariances`` has been moved to main constructor as |
| ``store_covariance``. |
| |
| .. versionchanged:: 0.19 |
| ``tol`` has been moved to main constructor. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Training vector, where `n_samples` is the number of samples and |
| `n_features` is the number of features. |
| |
| y : array-like of shape (n_samples,) |
| Target values (integers). |
| |
| Returns |
| ------- |
| self : object |
| Fitted estimator. |
| """ |
| X, y = validate_data(self, X, y) |
| check_classification_targets(y) |
| self.classes_, y = np.unique(y, return_inverse=True) |
| n_samples, n_features = X.shape |
| n_classes = len(self.classes_) |
| if n_classes < 2: |
| raise ValueError( |
| "The number of classes has to be greater than one; got %d class" |
| % (n_classes) |
| ) |
| if self.priors is None: |
| self.priors_ = np.bincount(y) / float(n_samples) |
| else: |
| self.priors_ = np.array(self.priors) |
|
|
| cov = None |
| store_covariance = self.store_covariance |
| if store_covariance: |
| cov = [] |
| means = [] |
| scalings = [] |
| rotations = [] |
| for ind in range(n_classes): |
| Xg = X[y == ind, :] |
| meang = Xg.mean(0) |
| means.append(meang) |
| if len(Xg) == 1: |
| raise ValueError( |
| "y has only 1 sample in class %s, covariance is ill defined." |
| % str(self.classes_[ind]) |
| ) |
| Xgc = Xg - meang |
| |
| _, S, Vt = np.linalg.svd(Xgc, full_matrices=False) |
| S2 = (S**2) / (len(Xg) - 1) |
| S2 = ((1 - self.reg_param) * S2) + self.reg_param |
| rank = np.sum(S2 > self.tol) |
| if rank < n_features: |
| warnings.warn( |
| f"The covariance matrix of class {ind} is not full rank. " |
| "Increasing the value of parameter `reg_param` might help" |
| " reducing the collinearity.", |
| linalg.LinAlgWarning, |
| ) |
| if self.store_covariance or store_covariance: |
| |
| cov.append(np.dot(S2 * Vt.T, Vt)) |
| scalings.append(S2) |
| rotations.append(Vt.T) |
| if self.store_covariance or store_covariance: |
| self.covariance_ = cov |
| self.means_ = np.asarray(means) |
| self.scalings_ = scalings |
| self.rotations_ = rotations |
| return self |
|
|
| def _decision_function(self, X): |
| |
| check_is_fitted(self) |
|
|
| X = validate_data(self, X, reset=False) |
| norm2 = [] |
| for i in range(len(self.classes_)): |
| R = self.rotations_[i] |
| S = self.scalings_[i] |
| Xm = X - self.means_[i] |
| X2 = np.dot(Xm, R * (S ** (-0.5))) |
| norm2.append(np.sum(X2**2, axis=1)) |
| norm2 = np.array(norm2).T |
| u = np.asarray([np.sum(np.log(s)) for s in self.scalings_]) |
| return -0.5 * (norm2 + u) + np.log(self.priors_) |
|
|
| def decision_function(self, X): |
| """Apply decision function to an array of samples. |
| |
| The decision function is equal (up to a constant factor) to the |
| log-posterior of the model, i.e. `log p(y = k | x)`. In a binary |
| classification setting this instead corresponds to the difference |
| `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Array of samples (test vectors). |
| |
| Returns |
| ------- |
| C : ndarray of shape (n_samples,) or (n_samples, n_classes) |
| Decision function values related to each class, per sample. |
| In the two-class case, the shape is `(n_samples,)`, giving the |
| log likelihood ratio of the positive class. |
| """ |
| return super().decision_function(X) |
|
|
| def predict(self, X): |
| """Perform classification on an array of test vectors X. |
| |
| The predicted class C for each sample in X is returned. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Vector to be scored, where `n_samples` is the number of samples and |
| `n_features` is the number of features. |
| |
| Returns |
| ------- |
| C : ndarray of shape (n_samples,) |
| Estimated probabilities. |
| """ |
| return super().predict(X) |
|
|
| def predict_proba(self, X): |
| """Return posterior probabilities of classification. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Array of samples/test vectors. |
| |
| Returns |
| ------- |
| C : ndarray of shape (n_samples, n_classes) |
| Posterior probabilities of classification per class. |
| """ |
| |
| |
| return super().predict_proba(X) |
|
|
| def predict_log_proba(self, X): |
| """Return log of posterior probabilities of classification. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| Array of samples/test vectors. |
| |
| Returns |
| ------- |
| C : ndarray of shape (n_samples, n_classes) |
| Posterior log-probabilities of classification per class. |
| """ |
| |
| return super().predict_log_proba(X) |
|
|