Prompt48 commited on
Commit
2e9dfc4
·
verified ·
1 Parent(s): 0b7a138

Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\sklearn\decomposition\tests\test_nmf.py with huggingface_hub

Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//sklearn//decomposition//tests//test_nmf.py ADDED
@@ -0,0 +1,1010 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import sys
3
+ from io import StringIO
4
+
5
+ import numpy as np
6
+ import pytest
7
+ from scipy import linalg
8
+
9
+ from sklearn.base import clone
10
+ from sklearn.decomposition import NMF, MiniBatchNMF, non_negative_factorization
11
+ from sklearn.decomposition import _nmf as nmf # For testing internals
12
+ from sklearn.exceptions import ConvergenceWarning
13
+ from sklearn.utils._testing import (
14
+ assert_allclose,
15
+ assert_almost_equal,
16
+ assert_array_almost_equal,
17
+ assert_array_equal,
18
+ )
19
+ from sklearn.utils.extmath import squared_norm
20
+ from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
21
+
22
+
23
+ @pytest.mark.parametrize(
24
+ ["Estimator", "solver"],
25
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
26
+ )
27
+ def test_convergence_warning(Estimator, solver):
28
+ convergence_warning = (
29
+ "Maximum number of iterations 1 reached. Increase it to improve convergence."
30
+ )
31
+ A = np.ones((2, 2))
32
+ with pytest.warns(ConvergenceWarning, match=convergence_warning):
33
+ Estimator(max_iter=1, n_components="auto", **solver).fit(A)
34
+
35
+
36
+ def test_initialize_nn_output():
37
+ # Test that initialization does not return negative values
38
+ rng = np.random.mtrand.RandomState(42)
39
+ data = np.abs(rng.randn(10, 10))
40
+ for init in ("random", "nndsvd", "nndsvda", "nndsvdar"):
41
+ W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0)
42
+ assert not ((W < 0).any() or (H < 0).any())
43
+
44
+
45
+ @pytest.mark.filterwarnings(
46
+ r"ignore:The multiplicative update \('mu'\) solver cannot update zeros present in"
47
+ r" the initialization",
48
+ )
49
+ def test_parameter_checking():
50
+ # Here we only check for invalid parameter values that are not already
51
+ # automatically tested in the common tests.
52
+
53
+ A = np.ones((2, 2))
54
+
55
+ msg = "Invalid beta_loss parameter: solver 'cd' does not handle beta_loss = 1.0"
56
+ with pytest.raises(ValueError, match=msg):
57
+ NMF(solver="cd", beta_loss=1.0).fit(A)
58
+ msg = "Negative values in data passed to"
59
+ with pytest.raises(ValueError, match=msg):
60
+ NMF().fit(-A)
61
+ clf = NMF(2, tol=0.1).fit(A)
62
+ with pytest.raises(ValueError, match=msg):
63
+ clf.transform(-A)
64
+ with pytest.raises(ValueError, match=msg):
65
+ nmf._initialize_nmf(-A, 2, "nndsvd")
66
+
67
+ for init in ["nndsvd", "nndsvda", "nndsvdar"]:
68
+ msg = re.escape(
69
+ "init = '{}' can only be used when "
70
+ "n_components <= min(n_samples, n_features)".format(init)
71
+ )
72
+ with pytest.raises(ValueError, match=msg):
73
+ NMF(3, init=init).fit(A)
74
+ with pytest.raises(ValueError, match=msg):
75
+ MiniBatchNMF(3, init=init).fit(A)
76
+ with pytest.raises(ValueError, match=msg):
77
+ nmf._initialize_nmf(A, 3, init)
78
+
79
+
80
+ def test_initialize_close():
81
+ # Test NNDSVD error
82
+ # Test that _initialize_nmf error is less than the standard deviation of
83
+ # the entries in the matrix.
84
+ rng = np.random.mtrand.RandomState(42)
85
+ A = np.abs(rng.randn(10, 10))
86
+ W, H = nmf._initialize_nmf(A, 10, init="nndsvd")
87
+ error = linalg.norm(np.dot(W, H) - A)
88
+ sdev = linalg.norm(A - A.mean())
89
+ assert error <= sdev
90
+
91
+
92
+ def test_initialize_variants():
93
+ # Test NNDSVD variants correctness
94
+ # Test that the variants 'nndsvda' and 'nndsvdar' differ from basic
95
+ # 'nndsvd' only where the basic version has zeros.
96
+ rng = np.random.mtrand.RandomState(42)
97
+ data = np.abs(rng.randn(10, 10))
98
+ W0, H0 = nmf._initialize_nmf(data, 10, init="nndsvd")
99
+ Wa, Ha = nmf._initialize_nmf(data, 10, init="nndsvda")
100
+ War, Har = nmf._initialize_nmf(data, 10, init="nndsvdar", random_state=0)
101
+
102
+ for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)):
103
+ assert_almost_equal(evl[ref != 0], ref[ref != 0])
104
+
105
+
106
+ # ignore UserWarning raised when both solver='mu' and init='nndsvd'
107
+ @pytest.mark.filterwarnings(
108
+ r"ignore:The multiplicative update \('mu'\) solver cannot update zeros present in"
109
+ r" the initialization"
110
+ )
111
+ @pytest.mark.parametrize(
112
+ ["Estimator", "solver"],
113
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
114
+ )
115
+ @pytest.mark.parametrize("init", (None, "nndsvd", "nndsvda", "nndsvdar", "random"))
116
+ @pytest.mark.parametrize("alpha_W", (0.0, 1.0))
117
+ @pytest.mark.parametrize("alpha_H", (0.0, 1.0, "same"))
118
+ def test_nmf_fit_nn_output(Estimator, solver, init, alpha_W, alpha_H):
119
+ # Test that the decomposition does not contain negative values
120
+ A = np.c_[5.0 - np.arange(1, 6), 5.0 + np.arange(1, 6)]
121
+ model = Estimator(
122
+ n_components=2,
123
+ init=init,
124
+ alpha_W=alpha_W,
125
+ alpha_H=alpha_H,
126
+ random_state=0,
127
+ **solver,
128
+ )
129
+ transf = model.fit_transform(A)
130
+ assert not ((model.components_ < 0).any() or (transf < 0).any())
131
+
132
+
133
+ @pytest.mark.parametrize(
134
+ ["Estimator", "solver"],
135
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
136
+ )
137
+ def test_nmf_fit_close(Estimator, solver):
138
+ rng = np.random.mtrand.RandomState(42)
139
+ # Test that the fit is not too far away
140
+ pnmf = Estimator(
141
+ 5,
142
+ init="nndsvdar",
143
+ random_state=0,
144
+ max_iter=600,
145
+ **solver,
146
+ )
147
+ X = np.abs(rng.randn(6, 5))
148
+ assert pnmf.fit(X).reconstruction_err_ < 0.1
149
+
150
+
151
+ def test_nmf_true_reconstruction():
152
+ # Test that the fit is not too far away from an exact solution
153
+ # (by construction)
154
+ n_samples = 15
155
+ n_features = 10
156
+ n_components = 5
157
+ beta_loss = 1
158
+ batch_size = 3
159
+ max_iter = 1000
160
+
161
+ rng = np.random.mtrand.RandomState(42)
162
+ W_true = np.zeros([n_samples, n_components])
163
+ W_array = np.abs(rng.randn(n_samples))
164
+ for j in range(n_components):
165
+ W_true[j % n_samples, j] = W_array[j % n_samples]
166
+ H_true = np.zeros([n_components, n_features])
167
+ H_array = np.abs(rng.randn(n_components))
168
+ for j in range(n_features):
169
+ H_true[j % n_components, j] = H_array[j % n_components]
170
+ X = np.dot(W_true, H_true)
171
+
172
+ model = NMF(
173
+ n_components=n_components,
174
+ solver="mu",
175
+ beta_loss=beta_loss,
176
+ max_iter=max_iter,
177
+ random_state=0,
178
+ )
179
+ transf = model.fit_transform(X)
180
+ X_calc = np.dot(transf, model.components_)
181
+
182
+ assert model.reconstruction_err_ < 0.1
183
+ assert_allclose(X, X_calc)
184
+
185
+ mbmodel = MiniBatchNMF(
186
+ n_components=n_components,
187
+ beta_loss=beta_loss,
188
+ batch_size=batch_size,
189
+ random_state=0,
190
+ max_iter=max_iter,
191
+ )
192
+ transf = mbmodel.fit_transform(X)
193
+ X_calc = np.dot(transf, mbmodel.components_)
194
+
195
+ assert mbmodel.reconstruction_err_ < 0.1
196
+ assert_allclose(X, X_calc, atol=1)
197
+
198
+
199
+ @pytest.mark.parametrize("solver", ["cd", "mu"])
200
+ def test_nmf_transform(solver):
201
+ # Test that fit_transform is equivalent to fit.transform for NMF
202
+ # Test that NMF.transform returns close values
203
+ rng = np.random.mtrand.RandomState(42)
204
+ A = np.abs(rng.randn(6, 5))
205
+ m = NMF(
206
+ solver=solver,
207
+ n_components=3,
208
+ init="random",
209
+ random_state=0,
210
+ tol=1e-6,
211
+ )
212
+ ft = m.fit_transform(A)
213
+ t = m.transform(A)
214
+ assert_allclose(ft, t, atol=1e-1)
215
+
216
+
217
+ def test_minibatch_nmf_transform():
218
+ # Test that fit_transform is equivalent to fit.transform for MiniBatchNMF
219
+ # Only guaranteed with fresh restarts
220
+ rng = np.random.mtrand.RandomState(42)
221
+ A = np.abs(rng.randn(6, 5))
222
+ m = MiniBatchNMF(
223
+ n_components=3,
224
+ random_state=0,
225
+ tol=1e-3,
226
+ fresh_restarts=True,
227
+ )
228
+ ft = m.fit_transform(A)
229
+ t = m.transform(A)
230
+ assert_allclose(ft, t)
231
+
232
+
233
+ @pytest.mark.parametrize(
234
+ ["Estimator", "solver"],
235
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
236
+ )
237
+ def test_nmf_transform_custom_init(Estimator, solver):
238
+ # Smoke test that checks if NMF.transform works with custom initialization
239
+ random_state = np.random.RandomState(0)
240
+ A = np.abs(random_state.randn(6, 5))
241
+ n_components = 4
242
+ avg = np.sqrt(A.mean() / n_components)
243
+ H_init = np.abs(avg * random_state.randn(n_components, 5))
244
+ W_init = np.abs(avg * random_state.randn(6, n_components))
245
+
246
+ m = Estimator(
247
+ n_components=n_components, init="custom", random_state=0, tol=1e-3, **solver
248
+ )
249
+ m.fit_transform(A, W=W_init, H=H_init)
250
+ m.transform(A)
251
+
252
+
253
+ @pytest.mark.parametrize("solver", ("cd", "mu"))
254
+ def test_nmf_inverse_transform(solver):
255
+ # Test that NMF.inverse_transform returns close values
256
+ random_state = np.random.RandomState(0)
257
+ A = np.abs(random_state.randn(6, 4))
258
+ m = NMF(
259
+ solver=solver,
260
+ n_components=4,
261
+ init="random",
262
+ random_state=0,
263
+ max_iter=1000,
264
+ )
265
+ ft = m.fit_transform(A)
266
+ A_new = m.inverse_transform(ft)
267
+ assert_array_almost_equal(A, A_new, decimal=2)
268
+
269
+
270
+ def test_mbnmf_inverse_transform():
271
+ # Test that MiniBatchNMF.transform followed by MiniBatchNMF.inverse_transform
272
+ # is close to the identity
273
+ rng = np.random.RandomState(0)
274
+ A = np.abs(rng.randn(6, 4))
275
+ nmf = MiniBatchNMF(
276
+ random_state=rng,
277
+ max_iter=500,
278
+ init="nndsvdar",
279
+ fresh_restarts=True,
280
+ )
281
+ ft = nmf.fit_transform(A)
282
+ A_new = nmf.inverse_transform(ft)
283
+ assert_allclose(A, A_new, rtol=1e-3, atol=1e-2)
284
+
285
+
286
+ @pytest.mark.parametrize("Estimator", [NMF, MiniBatchNMF])
287
+ def test_n_components_greater_n_features(Estimator):
288
+ # Smoke test for the case of more components than features.
289
+ rng = np.random.mtrand.RandomState(42)
290
+ A = np.abs(rng.randn(30, 10))
291
+ Estimator(n_components=15, random_state=0, tol=1e-2).fit(A)
292
+
293
+
294
+ @pytest.mark.parametrize(
295
+ ["Estimator", "solver"],
296
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
297
+ )
298
+ @pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS)
299
+ @pytest.mark.parametrize("alpha_W", (0.0, 1.0))
300
+ @pytest.mark.parametrize("alpha_H", (0.0, 1.0, "same"))
301
+ def test_nmf_sparse_input(Estimator, solver, sparse_container, alpha_W, alpha_H):
302
+ # Test that sparse matrices are accepted as input
303
+ rng = np.random.mtrand.RandomState(42)
304
+ A = np.abs(rng.randn(10, 10))
305
+ A[:, 2 * np.arange(5)] = 0
306
+ A_sparse = sparse_container(A)
307
+
308
+ est1 = Estimator(
309
+ n_components=5,
310
+ init="random",
311
+ alpha_W=alpha_W,
312
+ alpha_H=alpha_H,
313
+ random_state=0,
314
+ tol=0,
315
+ max_iter=100,
316
+ **solver,
317
+ )
318
+ est2 = clone(est1)
319
+
320
+ W1 = est1.fit_transform(A)
321
+ W2 = est2.fit_transform(A_sparse)
322
+ H1 = est1.components_
323
+ H2 = est2.components_
324
+
325
+ assert_allclose(W1, W2)
326
+ assert_allclose(H1, H2)
327
+
328
+
329
+ @pytest.mark.parametrize(
330
+ ["Estimator", "solver"],
331
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
332
+ )
333
+ @pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
334
+ def test_nmf_sparse_transform(Estimator, solver, csc_container):
335
+ # Test that transform works on sparse data. Issue #2124
336
+ rng = np.random.mtrand.RandomState(42)
337
+ A = np.abs(rng.randn(3, 2))
338
+ A[1, 1] = 0
339
+ A = csc_container(A)
340
+
341
+ model = Estimator(random_state=0, n_components=2, max_iter=400, **solver)
342
+ A_fit_tr = model.fit_transform(A)
343
+ A_tr = model.transform(A)
344
+ assert_allclose(A_fit_tr, A_tr, atol=1e-1)
345
+
346
+
347
+ @pytest.mark.parametrize("init", ["random", "nndsvd"])
348
+ @pytest.mark.parametrize("solver", ("cd", "mu"))
349
+ @pytest.mark.parametrize("alpha_W", (0.0, 1.0))
350
+ @pytest.mark.parametrize("alpha_H", (0.0, 1.0, "same"))
351
+ def test_non_negative_factorization_consistency(init, solver, alpha_W, alpha_H):
352
+ # Test that the function is called in the same way, either directly
353
+ # or through the NMF class
354
+ max_iter = 500
355
+ rng = np.random.mtrand.RandomState(42)
356
+ A = np.abs(rng.randn(10, 10))
357
+ A[:, 2 * np.arange(5)] = 0
358
+
359
+ W_nmf, H, _ = non_negative_factorization(
360
+ A,
361
+ init=init,
362
+ solver=solver,
363
+ max_iter=max_iter,
364
+ alpha_W=alpha_W,
365
+ alpha_H=alpha_H,
366
+ random_state=1,
367
+ tol=1e-2,
368
+ )
369
+ W_nmf_2, H, _ = non_negative_factorization(
370
+ A,
371
+ H=H,
372
+ update_H=False,
373
+ init=init,
374
+ solver=solver,
375
+ max_iter=max_iter,
376
+ alpha_W=alpha_W,
377
+ alpha_H=alpha_H,
378
+ random_state=1,
379
+ tol=1e-2,
380
+ )
381
+
382
+ model_class = NMF(
383
+ init=init,
384
+ solver=solver,
385
+ max_iter=max_iter,
386
+ alpha_W=alpha_W,
387
+ alpha_H=alpha_H,
388
+ random_state=1,
389
+ tol=1e-2,
390
+ )
391
+ W_cls = model_class.fit_transform(A)
392
+ W_cls_2 = model_class.transform(A)
393
+
394
+ assert_allclose(W_nmf, W_cls)
395
+ assert_allclose(W_nmf_2, W_cls_2)
396
+
397
+
398
+ def test_non_negative_factorization_checking():
399
+ # Note that the validity of parameter types and range of possible values
400
+ # for scalar numerical or str parameters is already checked in the common
401
+ # tests. Here we only check for problems that cannot be captured by simple
402
+ # declarative constraints on the valid parameter values.
403
+
404
+ A = np.ones((2, 2))
405
+ # Test parameters checking in public function
406
+ nnmf = non_negative_factorization
407
+ msg = re.escape("Negative values in data passed to NMF (input H)")
408
+ with pytest.raises(ValueError, match=msg):
409
+ nnmf(A, A, -A, 2, init="custom")
410
+ msg = re.escape("Negative values in data passed to NMF (input W)")
411
+ with pytest.raises(ValueError, match=msg):
412
+ nnmf(A, -A, A, 2, init="custom")
413
+ msg = re.escape("Array passed to NMF (input H) is full of zeros")
414
+ with pytest.raises(ValueError, match=msg):
415
+ nnmf(A, A, 0 * A, 2, init="custom")
416
+
417
+
418
+ def _beta_divergence_dense(X, W, H, beta):
419
+ """Compute the beta-divergence of X and W.H for dense array only.
420
+
421
+ Used as a reference for testing nmf._beta_divergence.
422
+ """
423
+ WH = np.dot(W, H)
424
+
425
+ if beta == 2:
426
+ return squared_norm(X - WH) / 2
427
+
428
+ WH_Xnonzero = WH[X != 0]
429
+ X_nonzero = X[X != 0]
430
+ np.maximum(WH_Xnonzero, 1e-9, out=WH_Xnonzero)
431
+
432
+ if beta == 1:
433
+ res = np.sum(X_nonzero * np.log(X_nonzero / WH_Xnonzero))
434
+ res += WH.sum() - X.sum()
435
+
436
+ elif beta == 0:
437
+ div = X_nonzero / WH_Xnonzero
438
+ res = np.sum(div) - X.size - np.sum(np.log(div))
439
+ else:
440
+ res = (X_nonzero**beta).sum()
441
+ res += (beta - 1) * (WH**beta).sum()
442
+ res -= beta * (X_nonzero * (WH_Xnonzero ** (beta - 1))).sum()
443
+ res /= beta * (beta - 1)
444
+
445
+ return res
446
+
447
+
448
+ @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
449
+ def test_beta_divergence(csr_container):
450
+ # Compare _beta_divergence with the reference _beta_divergence_dense
451
+ n_samples = 20
452
+ n_features = 10
453
+ n_components = 5
454
+ beta_losses = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0]
455
+
456
+ # initialization
457
+ rng = np.random.mtrand.RandomState(42)
458
+ X = rng.randn(n_samples, n_features)
459
+ np.clip(X, 0, None, out=X)
460
+ X_csr = csr_container(X)
461
+ W, H = nmf._initialize_nmf(X, n_components, init="random", random_state=42)
462
+
463
+ for beta in beta_losses:
464
+ ref = _beta_divergence_dense(X, W, H, beta)
465
+ loss = nmf._beta_divergence(X, W, H, beta)
466
+ loss_csr = nmf._beta_divergence(X_csr, W, H, beta)
467
+
468
+ assert_almost_equal(ref, loss, decimal=7)
469
+ assert_almost_equal(ref, loss_csr, decimal=7)
470
+
471
+
472
+ @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
473
+ def test_special_sparse_dot(csr_container):
474
+ # Test the function that computes np.dot(W, H), only where X is non zero.
475
+ n_samples = 10
476
+ n_features = 5
477
+ n_components = 3
478
+ rng = np.random.mtrand.RandomState(42)
479
+ X = rng.randn(n_samples, n_features)
480
+ np.clip(X, 0, None, out=X)
481
+ X_csr = csr_container(X)
482
+
483
+ W = np.abs(rng.randn(n_samples, n_components))
484
+ H = np.abs(rng.randn(n_components, n_features))
485
+
486
+ WH_safe = nmf._special_sparse_dot(W, H, X_csr)
487
+ WH = nmf._special_sparse_dot(W, H, X)
488
+
489
+ # test that both results have same values, in X_csr nonzero elements
490
+ ii, jj = X_csr.nonzero()
491
+ WH_safe_data = np.asarray(WH_safe[ii, jj]).ravel()
492
+ assert_array_almost_equal(WH_safe_data, WH[ii, jj], decimal=10)
493
+
494
+ # test that WH_safe and X_csr have the same sparse structure
495
+ assert_array_equal(WH_safe.indices, X_csr.indices)
496
+ assert_array_equal(WH_safe.indptr, X_csr.indptr)
497
+ assert_array_equal(WH_safe.shape, X_csr.shape)
498
+
499
+
500
+ @pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
501
+ @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
502
+ def test_nmf_multiplicative_update_sparse(csr_container):
503
+ # Compare sparse and dense input in multiplicative update NMF
504
+ # Also test continuity of the results with respect to beta_loss parameter
505
+ n_samples = 20
506
+ n_features = 10
507
+ n_components = 5
508
+ alpha = 0.1
509
+ l1_ratio = 0.5
510
+ n_iter = 20
511
+
512
+ # initialization
513
+ rng = np.random.mtrand.RandomState(1337)
514
+ X = rng.randn(n_samples, n_features)
515
+ X = np.abs(X)
516
+ X_csr = csr_container(X)
517
+ W0, H0 = nmf._initialize_nmf(X, n_components, init="random", random_state=42)
518
+
519
+ for beta_loss in (-1.2, 0, 0.2, 1.0, 2.0, 2.5):
520
+ # Reference with dense array X
521
+ W, H = W0.copy(), H0.copy()
522
+ W1, H1, _ = non_negative_factorization(
523
+ X,
524
+ W,
525
+ H,
526
+ n_components,
527
+ init="custom",
528
+ update_H=True,
529
+ solver="mu",
530
+ beta_loss=beta_loss,
531
+ max_iter=n_iter,
532
+ alpha_W=alpha,
533
+ l1_ratio=l1_ratio,
534
+ random_state=42,
535
+ )
536
+
537
+ # Compare with sparse X
538
+ W, H = W0.copy(), H0.copy()
539
+ W2, H2, _ = non_negative_factorization(
540
+ X_csr,
541
+ W,
542
+ H,
543
+ n_components,
544
+ init="custom",
545
+ update_H=True,
546
+ solver="mu",
547
+ beta_loss=beta_loss,
548
+ max_iter=n_iter,
549
+ alpha_W=alpha,
550
+ l1_ratio=l1_ratio,
551
+ random_state=42,
552
+ )
553
+
554
+ assert_allclose(W1, W2, atol=1e-7)
555
+ assert_allclose(H1, H2, atol=1e-7)
556
+
557
+ # Compare with almost same beta_loss, since some values have a specific
558
+ # behavior, but the results should be continuous w.r.t beta_loss
559
+ beta_loss -= 1.0e-5
560
+ W, H = W0.copy(), H0.copy()
561
+ W3, H3, _ = non_negative_factorization(
562
+ X_csr,
563
+ W,
564
+ H,
565
+ n_components,
566
+ init="custom",
567
+ update_H=True,
568
+ solver="mu",
569
+ beta_loss=beta_loss,
570
+ max_iter=n_iter,
571
+ alpha_W=alpha,
572
+ l1_ratio=l1_ratio,
573
+ random_state=42,
574
+ )
575
+
576
+ assert_allclose(W1, W3, atol=1e-4)
577
+ assert_allclose(H1, H3, atol=1e-4)
578
+
579
+
580
+ @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
581
+ def test_nmf_negative_beta_loss(csr_container):
582
+ # Test that an error is raised if beta_loss < 0 and X contains zeros.
583
+ # Test that the output has not NaN values when the input contains zeros.
584
+ n_samples = 6
585
+ n_features = 5
586
+ n_components = 3
587
+
588
+ rng = np.random.mtrand.RandomState(42)
589
+ X = rng.randn(n_samples, n_features)
590
+ np.clip(X, 0, None, out=X)
591
+ X_csr = csr_container(X)
592
+
593
+ def _assert_nmf_no_nan(X, beta_loss):
594
+ W, H, _ = non_negative_factorization(
595
+ X,
596
+ init="random",
597
+ n_components=n_components,
598
+ solver="mu",
599
+ beta_loss=beta_loss,
600
+ random_state=0,
601
+ max_iter=1000,
602
+ )
603
+ assert not np.any(np.isnan(W))
604
+ assert not np.any(np.isnan(H))
605
+
606
+ msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge."
607
+ for beta_loss in (-0.6, 0.0):
608
+ with pytest.raises(ValueError, match=msg):
609
+ _assert_nmf_no_nan(X, beta_loss)
610
+ _assert_nmf_no_nan(X + 1e-9, beta_loss)
611
+
612
+ for beta_loss in (0.2, 1.0, 1.2, 2.0, 2.5):
613
+ _assert_nmf_no_nan(X, beta_loss)
614
+ _assert_nmf_no_nan(X_csr, beta_loss)
615
+
616
+
617
+ @pytest.mark.parametrize("beta_loss", [-0.5, 0.0])
618
+ def test_minibatch_nmf_negative_beta_loss(beta_loss):
619
+ """Check that an error is raised if beta_loss < 0 and X contains zeros."""
620
+ rng = np.random.RandomState(0)
621
+ X = rng.normal(size=(6, 5))
622
+ X[X < 0] = 0
623
+
624
+ nmf = MiniBatchNMF(beta_loss=beta_loss, random_state=0)
625
+
626
+ msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge."
627
+ with pytest.raises(ValueError, match=msg):
628
+ nmf.fit(X)
629
+
630
+
631
+ @pytest.mark.parametrize(
632
+ ["Estimator", "solver"],
633
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
634
+ )
635
+ def test_nmf_regularization(Estimator, solver):
636
+ # Test the effect of L1 and L2 regularizations
637
+ n_samples = 6
638
+ n_features = 5
639
+ n_components = 3
640
+ rng = np.random.mtrand.RandomState(42)
641
+ X = np.abs(rng.randn(n_samples, n_features))
642
+
643
+ # L1 regularization should increase the number of zeros
644
+ l1_ratio = 1.0
645
+ regul = Estimator(
646
+ n_components=n_components,
647
+ alpha_W=0.5,
648
+ l1_ratio=l1_ratio,
649
+ random_state=42,
650
+ **solver,
651
+ )
652
+ model = Estimator(
653
+ n_components=n_components,
654
+ alpha_W=0.0,
655
+ l1_ratio=l1_ratio,
656
+ random_state=42,
657
+ **solver,
658
+ )
659
+
660
+ W_regul = regul.fit_transform(X)
661
+ W_model = model.fit_transform(X)
662
+
663
+ H_regul = regul.components_
664
+ H_model = model.components_
665
+
666
+ eps = np.finfo(np.float64).eps
667
+ W_regul_n_zeros = W_regul[W_regul <= eps].size
668
+ W_model_n_zeros = W_model[W_model <= eps].size
669
+ H_regul_n_zeros = H_regul[H_regul <= eps].size
670
+ H_model_n_zeros = H_model[H_model <= eps].size
671
+
672
+ assert W_regul_n_zeros > W_model_n_zeros
673
+ assert H_regul_n_zeros > H_model_n_zeros
674
+
675
+ # L2 regularization should decrease the sum of the squared norm
676
+ # of the matrices W and H
677
+ l1_ratio = 0.0
678
+ regul = Estimator(
679
+ n_components=n_components,
680
+ alpha_W=0.5,
681
+ l1_ratio=l1_ratio,
682
+ random_state=42,
683
+ **solver,
684
+ )
685
+ model = Estimator(
686
+ n_components=n_components,
687
+ alpha_W=0.0,
688
+ l1_ratio=l1_ratio,
689
+ random_state=42,
690
+ **solver,
691
+ )
692
+
693
+ W_regul = regul.fit_transform(X)
694
+ W_model = model.fit_transform(X)
695
+
696
+ H_regul = regul.components_
697
+ H_model = model.components_
698
+
699
+ assert (linalg.norm(W_model)) ** 2.0 + (linalg.norm(H_model)) ** 2.0 > (
700
+ linalg.norm(W_regul)
701
+ ) ** 2.0 + (linalg.norm(H_regul)) ** 2.0
702
+
703
+
704
+ @pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
705
+ @pytest.mark.parametrize("solver", ("cd", "mu"))
706
+ def test_nmf_decreasing(solver):
707
+ # test that the objective function is decreasing at each iteration
708
+ n_samples = 20
709
+ n_features = 15
710
+ n_components = 10
711
+ alpha = 0.1
712
+ l1_ratio = 0.5
713
+ tol = 0.0
714
+
715
+ # initialization
716
+ rng = np.random.mtrand.RandomState(42)
717
+ X = rng.randn(n_samples, n_features)
718
+ np.abs(X, X)
719
+ W0, H0 = nmf._initialize_nmf(X, n_components, init="random", random_state=42)
720
+
721
+ for beta_loss in (-1.2, 0, 0.2, 1.0, 2.0, 2.5):
722
+ if solver != "mu" and beta_loss != 2:
723
+ # not implemented
724
+ continue
725
+ W, H = W0.copy(), H0.copy()
726
+ previous_loss = None
727
+ for _ in range(30):
728
+ # one more iteration starting from the previous results
729
+ W, H, _ = non_negative_factorization(
730
+ X,
731
+ W,
732
+ H,
733
+ beta_loss=beta_loss,
734
+ init="custom",
735
+ n_components=n_components,
736
+ max_iter=1,
737
+ alpha_W=alpha,
738
+ solver=solver,
739
+ tol=tol,
740
+ l1_ratio=l1_ratio,
741
+ verbose=0,
742
+ random_state=0,
743
+ update_H=True,
744
+ )
745
+
746
+ loss = (
747
+ nmf._beta_divergence(X, W, H, beta_loss)
748
+ + alpha * l1_ratio * n_features * W.sum()
749
+ + alpha * l1_ratio * n_samples * H.sum()
750
+ + alpha * (1 - l1_ratio) * n_features * (W**2).sum()
751
+ + alpha * (1 - l1_ratio) * n_samples * (H**2).sum()
752
+ )
753
+ if previous_loss is not None:
754
+ assert previous_loss > loss
755
+ previous_loss = loss
756
+
757
+
758
+ def test_nmf_underflow():
759
+ # Regression test for an underflow issue in _beta_divergence
760
+ rng = np.random.RandomState(0)
761
+ n_samples, n_features, n_components = 10, 2, 2
762
+ X = np.abs(rng.randn(n_samples, n_features)) * 10
763
+ W = np.abs(rng.randn(n_samples, n_components)) * 10
764
+ H = np.abs(rng.randn(n_components, n_features))
765
+
766
+ X[0, 0] = 0
767
+ ref = nmf._beta_divergence(X, W, H, beta=1.0)
768
+ X[0, 0] = 1e-323
769
+ res = nmf._beta_divergence(X, W, H, beta=1.0)
770
+ assert_almost_equal(res, ref)
771
+
772
+
773
+ @pytest.mark.parametrize(
774
+ "dtype_in, dtype_out",
775
+ [
776
+ (np.float32, np.float32),
777
+ (np.float64, np.float64),
778
+ (np.int32, np.float64),
779
+ (np.int64, np.float64),
780
+ ],
781
+ )
782
+ @pytest.mark.parametrize(
783
+ ["Estimator", "solver"],
784
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
785
+ )
786
+ def test_nmf_dtype_match(Estimator, solver, dtype_in, dtype_out):
787
+ # Check that NMF preserves dtype (float32 and float64)
788
+ X = np.random.RandomState(0).randn(20, 15).astype(dtype_in, copy=False)
789
+ np.abs(X, out=X)
790
+
791
+ nmf = Estimator(
792
+ alpha_W=1.0,
793
+ alpha_H=1.0,
794
+ tol=1e-2,
795
+ random_state=0,
796
+ **solver,
797
+ )
798
+
799
+ assert nmf.fit(X).transform(X).dtype == dtype_out
800
+ assert nmf.fit_transform(X).dtype == dtype_out
801
+ assert nmf.components_.dtype == dtype_out
802
+
803
+
804
+ @pytest.mark.parametrize(
805
+ ["Estimator", "solver"],
806
+ [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]],
807
+ )
808
+ def test_nmf_float32_float64_consistency(Estimator, solver):
809
+ # Check that the result of NMF is the same between float32 and float64
810
+ X = np.random.RandomState(0).randn(50, 7)
811
+ np.abs(X, out=X)
812
+ nmf32 = Estimator(random_state=0, tol=1e-3, **solver)
813
+ W32 = nmf32.fit_transform(X.astype(np.float32))
814
+ nmf64 = Estimator(random_state=0, tol=1e-3, **solver)
815
+ W64 = nmf64.fit_transform(X)
816
+
817
+ assert_allclose(W32, W64, atol=1e-5)
818
+
819
+
820
+ @pytest.mark.parametrize("Estimator", [NMF, MiniBatchNMF])
821
+ def test_nmf_custom_init_dtype_error(Estimator):
822
+ # Check that an error is raise if custom H and/or W don't have the same
823
+ # dtype as X.
824
+ rng = np.random.RandomState(0)
825
+ X = rng.random_sample((20, 15))
826
+ H = rng.random_sample((15, 15)).astype(np.float32)
827
+ W = rng.random_sample((20, 15))
828
+
829
+ with pytest.raises(TypeError, match="should have the same dtype as X"):
830
+ Estimator(init="custom").fit(X, H=H, W=W)
831
+
832
+ with pytest.raises(TypeError, match="should have the same dtype as X"):
833
+ non_negative_factorization(X, H=H, update_H=False)
834
+
835
+
836
+ @pytest.mark.parametrize("beta_loss", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5])
837
+ def test_nmf_minibatchnmf_equivalence(beta_loss):
838
+ # Test that MiniBatchNMF is equivalent to NMF when batch_size = n_samples and
839
+ # forget_factor 0.0 (stopping criterion put aside)
840
+ rng = np.random.mtrand.RandomState(42)
841
+ X = np.abs(rng.randn(48, 5))
842
+
843
+ nmf = NMF(
844
+ n_components=5,
845
+ beta_loss=beta_loss,
846
+ solver="mu",
847
+ random_state=0,
848
+ tol=0,
849
+ )
850
+ mbnmf = MiniBatchNMF(
851
+ n_components=5,
852
+ beta_loss=beta_loss,
853
+ random_state=0,
854
+ tol=0,
855
+ max_no_improvement=None,
856
+ batch_size=X.shape[0],
857
+ forget_factor=0.0,
858
+ )
859
+ W = nmf.fit_transform(X)
860
+ mbW = mbnmf.fit_transform(X)
861
+ assert_allclose(W, mbW)
862
+
863
+
864
+ def test_minibatch_nmf_partial_fit():
865
+ # Check fit / partial_fit equivalence. Applicable only with fresh restarts.
866
+ rng = np.random.mtrand.RandomState(42)
867
+ X = np.abs(rng.randn(100, 5))
868
+
869
+ n_components = 5
870
+ batch_size = 10
871
+ max_iter = 2
872
+
873
+ mbnmf1 = MiniBatchNMF(
874
+ n_components=n_components,
875
+ init="custom",
876
+ random_state=0,
877
+ max_iter=max_iter,
878
+ batch_size=batch_size,
879
+ tol=0,
880
+ max_no_improvement=None,
881
+ fresh_restarts=False,
882
+ )
883
+ mbnmf2 = MiniBatchNMF(n_components=n_components, init="custom", random_state=0)
884
+
885
+ # Force the same init of H (W is recomputed anyway) to be able to compare results.
886
+ W, H = nmf._initialize_nmf(
887
+ X, n_components=n_components, init="random", random_state=0
888
+ )
889
+
890
+ mbnmf1.fit(X, W=W, H=H)
891
+ for i in range(max_iter):
892
+ for j in range(batch_size):
893
+ mbnmf2.partial_fit(X[j : j + batch_size], W=W[:batch_size], H=H)
894
+
895
+ assert mbnmf1.n_steps_ == mbnmf2.n_steps_
896
+ assert_allclose(mbnmf1.components_, mbnmf2.components_)
897
+
898
+
899
+ def test_feature_names_out():
900
+ """Check feature names out for NMF."""
901
+ random_state = np.random.RandomState(0)
902
+ X = np.abs(random_state.randn(10, 4))
903
+ nmf = NMF(n_components=3).fit(X)
904
+
905
+ names = nmf.get_feature_names_out()
906
+ assert_array_equal([f"nmf{i}" for i in range(3)], names)
907
+
908
+
909
+ def test_minibatch_nmf_verbose():
910
+ # Check verbose mode of MiniBatchNMF for better coverage.
911
+ A = np.random.RandomState(0).random_sample((100, 10))
912
+ nmf = MiniBatchNMF(tol=1e-2, random_state=0, verbose=1)
913
+ old_stdout = sys.stdout
914
+ sys.stdout = StringIO()
915
+ try:
916
+ nmf.fit(A)
917
+ finally:
918
+ sys.stdout = old_stdout
919
+
920
+
921
+ @pytest.mark.parametrize("Estimator", [NMF, MiniBatchNMF])
922
+ def test_nmf_n_components_auto(Estimator):
923
+ # Check that n_components is correctly inferred
924
+ # from the provided custom initialization.
925
+ rng = np.random.RandomState(0)
926
+ X = rng.random_sample((6, 5))
927
+ W = rng.random_sample((6, 2))
928
+ H = rng.random_sample((2, 5))
929
+ est = Estimator(
930
+ n_components="auto",
931
+ init="custom",
932
+ random_state=0,
933
+ tol=1e-6,
934
+ )
935
+ est.fit_transform(X, W=W, H=H)
936
+ assert est._n_components == H.shape[0]
937
+
938
+
939
+ def test_nmf_non_negative_factorization_n_components_auto():
940
+ # Check that n_components is correctly inferred from the provided
941
+ # custom initialization.
942
+ rng = np.random.RandomState(0)
943
+ X = rng.random_sample((6, 5))
944
+ W_init = rng.random_sample((6, 2))
945
+ H_init = rng.random_sample((2, 5))
946
+ W, H, _ = non_negative_factorization(
947
+ X, W=W_init, H=H_init, init="custom", n_components="auto"
948
+ )
949
+ assert H.shape == H_init.shape
950
+ assert W.shape == W_init.shape
951
+
952
+
953
+ def test_nmf_n_components_auto_no_h_update():
954
+ # Tests that non_negative_factorization does not fail when setting
955
+ # n_components="auto" also tests that the inferred n_component
956
+ # value is the right one.
957
+ rng = np.random.RandomState(0)
958
+ X = rng.random_sample((6, 5))
959
+ H_true = rng.random_sample((2, 5))
960
+ W, H, _ = non_negative_factorization(
961
+ X, H=H_true, n_components="auto", update_H=False
962
+ ) # should not fail
963
+ assert_allclose(H, H_true)
964
+ assert W.shape == (X.shape[0], H_true.shape[0])
965
+
966
+
967
+ def test_nmf_w_h_not_used_warning():
968
+ # Check that warnings are raised if user provided W and H are not used
969
+ # and initialization overrides value of W or H
970
+ rng = np.random.RandomState(0)
971
+ X = rng.random_sample((6, 5))
972
+ W_init = rng.random_sample((6, 2))
973
+ H_init = rng.random_sample((2, 5))
974
+ with pytest.warns(
975
+ RuntimeWarning,
976
+ match="When init!='custom', provided W or H are ignored",
977
+ ):
978
+ non_negative_factorization(X, H=H_init, update_H=True, n_components="auto")
979
+
980
+ with pytest.warns(
981
+ RuntimeWarning,
982
+ match="When init!='custom', provided W or H are ignored",
983
+ ):
984
+ non_negative_factorization(
985
+ X, W=W_init, H=H_init, update_H=True, n_components="auto"
986
+ )
987
+
988
+ with pytest.warns(
989
+ RuntimeWarning, match="When update_H=False, the provided initial W is not used."
990
+ ):
991
+ # When update_H is False, W is ignored regardless of init
992
+ # TODO: use the provided W when init="custom".
993
+ non_negative_factorization(
994
+ X, W=W_init, H=H_init, update_H=False, n_components="auto"
995
+ )
996
+
997
+
998
+ def test_nmf_custom_init_shape_error():
999
+ # Check that an informative error is raised when custom initialization does not
1000
+ # have the right shape
1001
+ rng = np.random.RandomState(0)
1002
+ X = rng.random_sample((6, 5))
1003
+ H = rng.random_sample((2, 5))
1004
+ nmf = NMF(n_components=2, init="custom", random_state=0)
1005
+
1006
+ with pytest.raises(ValueError, match="Array with wrong first dimension passed"):
1007
+ nmf.fit(X, H=H, W=rng.random_sample((5, 2)))
1008
+
1009
+ with pytest.raises(ValueError, match="Array with wrong second dimension passed"):
1010
+ nmf.fit(X, H=H, W=rng.random_sample((6, 3)))