spam-classifier / venv /lib /python3.11 /site-packages /sklearn /tests /test_discriminant_analysis.py

Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified 8 months ago

22.9 kB

	import warnings

	import numpy as np
	import pytest
	from scipy import linalg

	from sklearn.cluster import KMeans
	from sklearn.covariance import LedoitWolf, ShrunkCovariance, ledoit_wolf
	from sklearn.datasets import make_blobs
	from sklearn.discriminant_analysis import (
	LinearDiscriminantAnalysis,
	QuadraticDiscriminantAnalysis,
	_cov,
	)
	from sklearn.preprocessing import StandardScaler
	from sklearn.utils import check_random_state
	from sklearn.utils._testing import (
	_convert_container,
	assert_allclose,
	assert_almost_equal,
	assert_array_almost_equal,
	assert_array_equal,
	)
	from sklearn.utils.fixes import _IS_WASM

	# Data is just 6 separable points in the plane
	X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype="f")
	y = np.array([1, 1, 1, 2, 2, 2])
	y3 = np.array([1, 1, 2, 2, 3, 3])

	# Degenerate data with only one feature (still should be separable)
	X1 = np.array(
	[[-2], [-1], [-1], [1], [1], [2]],
	dtype="f",
	)

	# Data is just 9 separable points in the plane
	X6 = np.array(
	[[0, 0], [-2, -2], [-2, -1], [-1, -1], [-1, -2], [1, 3], [1, 2], [2, 1], [2, 2]]
	)
	y6 = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2])
	y7 = np.array([1, 2, 3, 2, 3, 1, 2, 3, 1])

	# Degenerate data with 1 feature (still should be separable)
	X7 = np.array([[-3], [-2], [-1], [-1], [0], [1], [1], [2], [3]])

	# Data that has zero variance in one dimension and needs regularization
	X2 = np.array(
	[[-3, 0], [-2, 0], [-1, 0], [-1, 0], [0, 0], [1, 0], [1, 0], [2, 0], [3, 0]]
	)

	# One element class
	y4 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 2])

	# Data with less samples in a class than n_features
	X5 = np.c_[np.arange(8), np.zeros((8, 3))]
	y5 = np.array([0, 0, 0, 0, 0, 1, 1, 1])

	solver_shrinkage = [
	("svd", None),
	("lsqr", None),
	("eigen", None),
	("lsqr", "auto"),
	("lsqr", 0),
	("lsqr", 0.43),
	("eigen", "auto"),
	("eigen", 0),
	("eigen", 0.43),
	]


	def test_lda_predict():
	# Test LDA classification.
	# This checks that LDA implements fit and predict and returns correct
	# values for simple toy data.
	for test_case in solver_shrinkage:
	solver, shrinkage = test_case
	clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
	y_pred = clf.fit(X, y).predict(X)
	assert_array_equal(y_pred, y, "solver %s" % solver)

	# Assert that it works with 1D data
	y_pred1 = clf.fit(X1, y).predict(X1)
	assert_array_equal(y_pred1, y, "solver %s" % solver)

	# Test probability estimates
	y_proba_pred1 = clf.predict_proba(X1)
	assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, "solver %s" % solver)
	y_log_proba_pred1 = clf.predict_log_proba(X1)
	assert_allclose(
	np.exp(y_log_proba_pred1),
	y_proba_pred1,
	rtol=1e-6,
	atol=1e-6,
	err_msg="solver %s" % solver,
	)

	# Primarily test for commit 2f34950 -- "reuse" of priors
	y_pred3 = clf.fit(X, y3).predict(X)
	# LDA shouldn't be able to separate those
	assert np.any(y_pred3 != y3), "solver %s" % solver

	clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
	with pytest.raises(NotImplementedError):
	clf.fit(X, y)

	clf = LinearDiscriminantAnalysis(
	solver="lsqr", shrinkage=0.1, covariance_estimator=ShrunkCovariance()
	)
	with pytest.raises(
	ValueError,
	match=(
	"covariance_estimator and shrinkage "
	"parameters are not None. "
	"Only one of the two can be set."
	),
	):
	clf.fit(X, y)

	# test bad solver with covariance_estimator
	clf = LinearDiscriminantAnalysis(solver="svd", covariance_estimator=LedoitWolf())
	with pytest.raises(
	ValueError, match="covariance estimator is not supported with svd"
	):
	clf.fit(X, y)

	# test bad covariance estimator
	clf = LinearDiscriminantAnalysis(
	solver="lsqr", covariance_estimator=KMeans(n_clusters=2, n_init="auto")
	)
	with pytest.raises(ValueError):
	clf.fit(X, y)


	@pytest.mark.parametrize("n_classes", [2, 3])
	@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
	def test_lda_predict_proba(solver, n_classes):
	def generate_dataset(n_samples, centers, covariances, random_state=None):
	"""Generate a multivariate normal data given some centers and
	covariances"""
	rng = check_random_state(random_state)
	X = np.vstack(
	[
	rng.multivariate_normal(mean, cov, size=n_samples // len(centers))
	for mean, cov in zip(centers, covariances)
	]
	)
	y = np.hstack(
	[[clazz] * (n_samples // len(centers)) for clazz in range(len(centers))]
	)
	return X, y

	blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes]
	blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers))
	X, y = generate_dataset(
	n_samples=90000, centers=blob_centers, covariances=blob_stds, random_state=42
	)
	lda = LinearDiscriminantAnalysis(
	solver=solver, store_covariance=True, shrinkage=None
	).fit(X, y)
	# check that the empirical means and covariances are close enough to the
	# one used to generate the data
	assert_allclose(lda.means_, blob_centers, atol=1e-1)
	assert_allclose(lda.covariance_, blob_stds[0], atol=1)

	# implement the method to compute the probability given in The Elements
	# of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
	# or LDA?")
	precision = linalg.inv(blob_stds[0])
	alpha_k = []
	alpha_k_0 = []
	for clazz in range(len(blob_centers) - 1):
	alpha_k.append(
	np.dot(precision, (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis])
	)
	alpha_k_0.append(
	np.dot(
	-0.5 * (blob_centers[clazz] + blob_centers[-1])[np.newaxis, :],
	alpha_k[-1],
	)
	)

	sample = np.array([[-22, 22]])

	def discriminant_func(sample, coef, intercept, clazz):
	return np.exp(intercept[clazz] + np.dot(sample, coef[clazz])).item()

	prob = np.array(
	[
	float(
	discriminant_func(sample, alpha_k, alpha_k_0, clazz)
	/ (
	1
	+ sum(
	[
	discriminant_func(sample, alpha_k, alpha_k_0, clazz)
	for clazz in range(n_classes - 1)
	]
	)
	)
	)
	for clazz in range(n_classes - 1)
	]
	)

	prob_ref = 1 - np.sum(prob)

	# check the consistency of the computed probability
	# all probabilities should sum to one
	prob_ref_2 = float(
	1
	/ (
	1
	+ sum(
	[
	discriminant_func(sample, alpha_k, alpha_k_0, clazz)
	for clazz in range(n_classes - 1)
	]
	)
	)
	)

	assert prob_ref == pytest.approx(prob_ref_2)
	# check that the probability of LDA are close to the theoretical
	# probabilities
	assert_allclose(
	lda.predict_proba(sample), np.hstack([prob, prob_ref])[np.newaxis], atol=1e-2
	)


	def test_lda_priors():
	# Test priors (negative priors)
	priors = np.array([0.5, -0.5])
	clf = LinearDiscriminantAnalysis(priors=priors)
	msg = "priors must be non-negative"

	with pytest.raises(ValueError, match=msg):
	clf.fit(X, y)

	# Test that priors passed as a list are correctly handled (run to see if
	# failure)
	clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5])
	clf.fit(X, y)

	# Test that priors always sum to 1
	priors = np.array([0.5, 0.6])
	prior_norm = np.array([0.45, 0.55])
	clf = LinearDiscriminantAnalysis(priors=priors)

	with pytest.warns(UserWarning):
	clf.fit(X, y)

	assert_array_almost_equal(clf.priors_, prior_norm, 2)


	def test_lda_coefs():
	# Test if the coefficients of the solvers are approximately the same.
	n_features = 2
	n_classes = 2
	n_samples = 1000
	X, y = make_blobs(
	n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=11
	)

	clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
	clf_lda_lsqr = LinearDiscriminantAnalysis(solver="lsqr")
	clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")

	clf_lda_svd.fit(X, y)
	clf_lda_lsqr.fit(X, y)
	clf_lda_eigen.fit(X, y)

	assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_lsqr.coef_, 1)
	assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_eigen.coef_, 1)
	assert_array_almost_equal(clf_lda_eigen.coef_, clf_lda_lsqr.coef_, 1)


	def test_lda_transform():
	# Test LDA transform.
	clf = LinearDiscriminantAnalysis(solver="svd", n_components=1)
	X_transformed = clf.fit(X, y).transform(X)
	assert X_transformed.shape[1] == 1
	clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1)
	X_transformed = clf.fit(X, y).transform(X)
	assert X_transformed.shape[1] == 1

	clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1)
	clf.fit(X, y)
	msg = "transform not implemented for 'lsqr'"

	with pytest.raises(NotImplementedError, match=msg):
	clf.transform(X)


	def test_lda_explained_variance_ratio():
	# Test if the sum of the normalized eigen vectors values equals 1,
	# Also tests whether the explained_variance_ratio_ formed by the
	# eigen solver is the same as the explained_variance_ratio_ formed
	# by the svd solver

	state = np.random.RandomState(0)
	X = state.normal(loc=0, scale=100, size=(40, 20))
	y = state.randint(0, 3, size=(40,))

	clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
	clf_lda_eigen.fit(X, y)
	assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
	assert clf_lda_eigen.explained_variance_ratio_.shape == (
	2,
	), "Unexpected length for explained_variance_ratio_"

	clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
	clf_lda_svd.fit(X, y)
	assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
	assert clf_lda_svd.explained_variance_ratio_.shape == (
	2,
	), "Unexpected length for explained_variance_ratio_"

	assert_array_almost_equal(
	clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_
	)


	def test_lda_orthogonality():
	# arrange four classes with their means in a kite-shaped pattern
	# the longer distance should be transformed to the first component, and
	# the shorter distance to the second component.
	means = np.array([[0, 0, -1], [0, 2, 0], [0, -2, 0], [0, 0, 5]])

	# We construct perfectly symmetric distributions, so the LDA can estimate
	# precise means.
	scatter = np.array(
	[
	[0.1, 0, 0],
	[-0.1, 0, 0],
	[0, 0.1, 0],
	[0, -0.1, 0],
	[0, 0, 0.1],
	[0, 0, -0.1],
	]
	)

	X = (means[:, np.newaxis, :] + scatter[np.newaxis, :, :]).reshape((-1, 3))
	y = np.repeat(np.arange(means.shape[0]), scatter.shape[0])

	# Fit LDA and transform the means
	clf = LinearDiscriminantAnalysis(solver="svd").fit(X, y)
	means_transformed = clf.transform(means)

	d1 = means_transformed[3] - means_transformed[0]
	d2 = means_transformed[2] - means_transformed[1]
	d1 /= np.sqrt(np.sum(d1**2))
	d2 /= np.sqrt(np.sum(d2**2))

	# the transformed within-class covariance should be the identity matrix
	assert_almost_equal(np.cov(clf.transform(scatter).T), np.eye(2))

	# the means of classes 0 and 3 should lie on the first component
	assert_almost_equal(np.abs(np.dot(d1[:2], [1, 0])), 1.0)

	# the means of classes 1 and 2 should lie on the second component
	assert_almost_equal(np.abs(np.dot(d2[:2], [0, 1])), 1.0)


	def test_lda_scaling():
	# Test if classification works correctly with differently scaled features.
	n = 100
	rng = np.random.RandomState(1234)
	# use uniform distribution of features to make sure there is absolutely no
	# overlap between classes.
	x1 = rng.uniform(-1, 1, (n, 3)) + [-10, 0, 0]
	x2 = rng.uniform(-1, 1, (n, 3)) + [10, 0, 0]
	x = np.vstack((x1, x2)) * [1, 100, 10000]
	y = [-1] * n + [1] * n

	for solver in ("svd", "lsqr", "eigen"):
	clf = LinearDiscriminantAnalysis(solver=solver)
	# should be able to separate the data perfectly
	assert clf.fit(x, y).score(x, y) == 1.0, "using covariance: %s" % solver


	def test_lda_store_covariance():
	# Test for solver 'lsqr' and 'eigen'
	# 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers
	for solver in ("lsqr", "eigen"):
	clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6)
	assert hasattr(clf, "covariance_")

	# Test the actual attribute:
	clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(
	X6, y6
	)
	assert hasattr(clf, "covariance_")

	assert_array_almost_equal(
	clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
	)

	# Test for SVD solver, the default is to not set the covariances_ attribute
	clf = LinearDiscriminantAnalysis(solver="svd").fit(X6, y6)
	assert not hasattr(clf, "covariance_")

	# Test the actual attribute:
	clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(X6, y6)
	assert hasattr(clf, "covariance_")

	assert_array_almost_equal(
	clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])
	)


	@pytest.mark.parametrize("seed", range(10))
	def test_lda_shrinkage(seed):
	# Test that shrunk covariance estimator and shrinkage parameter behave the
	# same
	rng = np.random.RandomState(seed)
	X = rng.rand(100, 10)
	y = rng.randint(3, size=(100))
	c1 = LinearDiscriminantAnalysis(store_covariance=True, shrinkage=0.5, solver="lsqr")
	c2 = LinearDiscriminantAnalysis(
	store_covariance=True,
	covariance_estimator=ShrunkCovariance(shrinkage=0.5),
	solver="lsqr",
	)
	c1.fit(X, y)
	c2.fit(X, y)
	assert_allclose(c1.means_, c2.means_)
	assert_allclose(c1.covariance_, c2.covariance_)


	def test_lda_ledoitwolf():
	# When shrinkage="auto" current implementation uses ledoitwolf estimation
	# of covariance after standardizing the data. This checks that it is indeed
	# the case
	class StandardizedLedoitWolf:
	def fit(self, X):
	sc = StandardScaler() # standardize features
	X_sc = sc.fit_transform(X)
	s = ledoit_wolf(X_sc)[0]
	# rescale
	s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
	self.covariance_ = s

	rng = np.random.RandomState(0)
	X = rng.rand(100, 10)
	y = rng.randint(3, size=(100,))
	c1 = LinearDiscriminantAnalysis(
	store_covariance=True, shrinkage="auto", solver="lsqr"
	)
	c2 = LinearDiscriminantAnalysis(
	store_covariance=True,
	covariance_estimator=StandardizedLedoitWolf(),
	solver="lsqr",
	)
	c1.fit(X, y)
	c2.fit(X, y)
	assert_allclose(c1.means_, c2.means_)
	assert_allclose(c1.covariance_, c2.covariance_)


	@pytest.mark.parametrize("n_features", [3, 5])
	@pytest.mark.parametrize("n_classes", [5, 3])
	def test_lda_dimension_warning(n_classes, n_features):
	rng = check_random_state(0)
	n_samples = 10
	X = rng.randn(n_samples, n_features)
	# we create n_classes labels by repeating and truncating a
	# range(n_classes) until n_samples
	y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
	max_components = min(n_features, n_classes - 1)

	for n_components in [max_components - 1, None, max_components]:
	# if n_components <= min(n_classes - 1, n_features), no warning
	lda = LinearDiscriminantAnalysis(n_components=n_components)
	lda.fit(X, y)

	for n_components in [max_components + 1, max(n_features, n_classes - 1) + 1]:
	# if n_components > min(n_classes - 1, n_features), raise error.
	# We test one unit higher than max_components, and then something
	# larger than both n_features and n_classes - 1 to ensure the test
	# works for any value of n_component
	lda = LinearDiscriminantAnalysis(n_components=n_components)
	msg = "n_components cannot be larger than "
	with pytest.raises(ValueError, match=msg):
	lda.fit(X, y)


	@pytest.mark.parametrize(
	"data_type, expected_type",
	[
	(np.float32, np.float32),
	(np.float64, np.float64),
	(np.int32, np.float64),
	(np.int64, np.float64),
	],
	)
	def test_lda_dtype_match(data_type, expected_type):
	for solver, shrinkage in solver_shrinkage:
	clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
	clf.fit(X.astype(data_type), y.astype(data_type))
	assert clf.coef_.dtype == expected_type


	def test_lda_numeric_consistency_float32_float64():
	for solver, shrinkage in solver_shrinkage:
	clf_32 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
	clf_32.fit(X.astype(np.float32), y.astype(np.float32))
	clf_64 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
	clf_64.fit(X.astype(np.float64), y.astype(np.float64))

	# Check value consistency between types
	rtol = 1e-6
	assert_allclose(clf_32.coef_, clf_64.coef_, rtol=rtol)


	def test_qda():
	# QDA classification.
	# This checks that QDA implements fit and predict and returns
	# correct values for a simple toy dataset.
	clf = QuadraticDiscriminantAnalysis()
	y_pred = clf.fit(X6, y6).predict(X6)
	assert_array_equal(y_pred, y6)

	# Assure that it works with 1D data
	y_pred1 = clf.fit(X7, y6).predict(X7)
	assert_array_equal(y_pred1, y6)

	# Test probas estimates
	y_proba_pred1 = clf.predict_proba(X7)
	assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y6)
	y_log_proba_pred1 = clf.predict_log_proba(X7)
	assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8)

	y_pred3 = clf.fit(X6, y7).predict(X6)
	# QDA shouldn't be able to separate those
	assert np.any(y_pred3 != y7)

	# Classes should have at least 2 elements
	with pytest.raises(ValueError):
	clf.fit(X6, y4)


	def test_qda_priors():
	clf = QuadraticDiscriminantAnalysis()
	y_pred = clf.fit(X6, y6).predict(X6)
	n_pos = np.sum(y_pred == 2)

	neg = 1e-10
	clf = QuadraticDiscriminantAnalysis(priors=np.array([neg, 1 - neg]))
	y_pred = clf.fit(X6, y6).predict(X6)
	n_pos2 = np.sum(y_pred == 2)

	assert n_pos2 > n_pos


	@pytest.mark.parametrize("priors_type", ["list", "tuple", "array"])
	def test_qda_prior_type(priors_type):
	"""Check that priors accept array-like."""
	priors = [0.5, 0.5]
	clf = QuadraticDiscriminantAnalysis(
	priors=_convert_container([0.5, 0.5], priors_type)
	).fit(X6, y6)
	assert isinstance(clf.priors_, np.ndarray)
	assert_array_equal(clf.priors_, priors)


	def test_qda_prior_copy():
	"""Check that altering `priors` without `fit` doesn't change `priors_`"""
	priors = np.array([0.5, 0.5])
	qda = QuadraticDiscriminantAnalysis(priors=priors).fit(X, y)

	# we expect the following
	assert_array_equal(qda.priors_, qda.priors)

	# altering `priors` without `fit` should not change `priors_`
	priors[0] = 0.2
	assert qda.priors_[0] != qda.priors[0]


	def test_qda_store_covariance():
	# The default is to not set the covariances_ attribute
	clf = QuadraticDiscriminantAnalysis().fit(X6, y6)
	assert not hasattr(clf, "covariance_")

	# Test the actual attribute:
	clf = QuadraticDiscriminantAnalysis(store_covariance=True).fit(X6, y6)
	assert hasattr(clf, "covariance_")

	assert_array_almost_equal(clf.covariance_[0], np.array([[0.7, 0.45], [0.45, 0.7]]))

	assert_array_almost_equal(
	clf.covariance_[1],
	np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]),
	)


	@pytest.mark.xfail(
	_IS_WASM,
	reason=(
	"no floating point exceptions, see"
	" https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881"
	),
	)
	def test_qda_regularization():
	# The default is reg_param=0. and will cause issues when there is a
	# constant variable.

	# Fitting on data with constant variable without regularization
	# triggers a LinAlgError.
	msg = r"The covariance matrix of class .+ is not full rank"
	clf = QuadraticDiscriminantAnalysis()
	with pytest.warns(linalg.LinAlgWarning, match=msg):
	y_pred = clf.fit(X2, y6)

	y_pred = clf.predict(X2)
	assert np.any(y_pred != y6)

	# Adding a little regularization fixes the fit time error.
	clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
	with warnings.catch_warnings():
	warnings.simplefilter("error")
	clf.fit(X2, y6)
	y_pred = clf.predict(X2)
	assert_array_equal(y_pred, y6)

	# LinAlgWarning should also be there for the n_samples_in_a_class <
	# n_features case.
	clf = QuadraticDiscriminantAnalysis()
	with pytest.warns(linalg.LinAlgWarning, match=msg):
	clf.fit(X5, y5)

	# The error will persist even with regularization
	clf = QuadraticDiscriminantAnalysis(reg_param=0.3)
	with pytest.warns(linalg.LinAlgWarning, match=msg):
	clf.fit(X5, y5)


	def test_covariance():
	x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42)

	# make features correlated
	x = np.dot(x, np.arange(x.shape[1] ** 2).reshape(x.shape[1], x.shape[1]))

	c_e = _cov(x, "empirical")
	assert_almost_equal(c_e, c_e.T)

	c_s = _cov(x, "auto")
	assert_almost_equal(c_s, c_s.T)


	@pytest.mark.parametrize("solver", ["svd", "lsqr", "eigen"])
	def test_raises_value_error_on_same_number_of_classes_and_samples(solver):
	"""
	Tests that if the number of samples equals the number
	of classes, a ValueError is raised.
	"""
	X = np.array([[0.5, 0.6], [0.6, 0.5]])
	y = np.array(["a", "b"])
	clf = LinearDiscriminantAnalysis(solver=solver)
	with pytest.raises(ValueError, match="The number of samples must be more"):
	clf.fit(X, y)


	def test_get_feature_names_out():
	"""Check get_feature_names_out uses class name as prefix."""

	est = LinearDiscriminantAnalysis().fit(X, y)
	names_out = est.get_feature_names_out()

	class_name_lower = "LinearDiscriminantAnalysis".lower()
	expected_names_out = np.array(
	[
	f"{class_name_lower}{i}"
	for i in range(est.explained_variance_ratio_.shape[0])
	],
	dtype=object,
	)
	assert_array_equal(names_out, expected_names_out)