Prompt48 commited on
Commit
f786f53
·
verified ·
1 Parent(s): 246f759

Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\sklearn\ensemble\_hist_gradient_boosting\tests\test_histogram.py with huggingface_hub

Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//sklearn//ensemble//_hist_gradient_boosting//tests//test_histogram.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+ from numpy.testing import assert_allclose, assert_array_equal
4
+
5
+ from sklearn.ensemble._hist_gradient_boosting.common import (
6
+ G_H_DTYPE,
7
+ HISTOGRAM_DTYPE,
8
+ X_BINNED_DTYPE,
9
+ )
10
+ from sklearn.ensemble._hist_gradient_boosting.histogram import (
11
+ _build_histogram,
12
+ _build_histogram_naive,
13
+ _build_histogram_no_hessian,
14
+ _build_histogram_root,
15
+ _build_histogram_root_no_hessian,
16
+ _subtract_histograms,
17
+ )
18
+
19
+
20
+ @pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram])
21
+ def test_build_histogram(build_func):
22
+ binned_feature = np.array([0, 2, 0, 1, 2, 0, 2, 1], dtype=X_BINNED_DTYPE)
23
+
24
+ # Small sample_indices (below unrolling threshold)
25
+ ordered_gradients = np.array([0, 1, 3], dtype=G_H_DTYPE)
26
+ ordered_hessians = np.array([1, 1, 2], dtype=G_H_DTYPE)
27
+
28
+ sample_indices = np.array([0, 2, 3], dtype=np.uint32)
29
+ hist = np.zeros((1, 3), dtype=HISTOGRAM_DTYPE)
30
+ build_func(
31
+ 0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist
32
+ )
33
+ hist = hist[0]
34
+ assert_array_equal(hist["count"], [2, 1, 0])
35
+ assert_allclose(hist["sum_gradients"], [1, 3, 0])
36
+ assert_allclose(hist["sum_hessians"], [2, 2, 0])
37
+
38
+ # Larger sample_indices (above unrolling threshold)
39
+ sample_indices = np.array([0, 2, 3, 6, 7], dtype=np.uint32)
40
+ ordered_gradients = np.array([0, 1, 3, 0, 1], dtype=G_H_DTYPE)
41
+ ordered_hessians = np.array([1, 1, 2, 1, 0], dtype=G_H_DTYPE)
42
+
43
+ hist = np.zeros((1, 3), dtype=HISTOGRAM_DTYPE)
44
+ build_func(
45
+ 0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist
46
+ )
47
+ hist = hist[0]
48
+ assert_array_equal(hist["count"], [2, 2, 1])
49
+ assert_allclose(hist["sum_gradients"], [1, 4, 0])
50
+ assert_allclose(hist["sum_hessians"], [2, 2, 1])
51
+
52
+
53
+ def test_histogram_sample_order_independence():
54
+ # Make sure the order of the samples has no impact on the histogram
55
+ # computations
56
+ rng = np.random.RandomState(42)
57
+ n_sub_samples = 100
58
+ n_samples = 1000
59
+ n_bins = 256
60
+
61
+ binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=X_BINNED_DTYPE)
62
+ sample_indices = rng.choice(
63
+ np.arange(n_samples, dtype=np.uint32), n_sub_samples, replace=False
64
+ )
65
+ ordered_gradients = rng.randn(n_sub_samples).astype(G_H_DTYPE)
66
+ hist_gc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
67
+ _build_histogram_no_hessian(
68
+ 0, sample_indices, binned_feature, ordered_gradients, hist_gc
69
+ )
70
+
71
+ ordered_hessians = rng.exponential(size=n_sub_samples).astype(G_H_DTYPE)
72
+ hist_ghc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
73
+ _build_histogram(
74
+ 0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist_ghc
75
+ )
76
+
77
+ permutation = rng.permutation(n_sub_samples)
78
+ hist_gc_perm = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
79
+ _build_histogram_no_hessian(
80
+ 0,
81
+ sample_indices[permutation],
82
+ binned_feature,
83
+ ordered_gradients[permutation],
84
+ hist_gc_perm,
85
+ )
86
+
87
+ hist_ghc_perm = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
88
+ _build_histogram(
89
+ 0,
90
+ sample_indices[permutation],
91
+ binned_feature,
92
+ ordered_gradients[permutation],
93
+ ordered_hessians[permutation],
94
+ hist_ghc_perm,
95
+ )
96
+
97
+ hist_gc = hist_gc[0]
98
+ hist_ghc = hist_ghc[0]
99
+ hist_gc_perm = hist_gc_perm[0]
100
+ hist_ghc_perm = hist_ghc_perm[0]
101
+
102
+ assert_allclose(hist_gc["sum_gradients"], hist_gc_perm["sum_gradients"])
103
+ assert_array_equal(hist_gc["count"], hist_gc_perm["count"])
104
+
105
+ assert_allclose(hist_ghc["sum_gradients"], hist_ghc_perm["sum_gradients"])
106
+ assert_allclose(hist_ghc["sum_hessians"], hist_ghc_perm["sum_hessians"])
107
+ assert_array_equal(hist_ghc["count"], hist_ghc_perm["count"])
108
+
109
+
110
+ @pytest.mark.parametrize("constant_hessian", [True, False])
111
+ def test_unrolled_equivalent_to_naive(constant_hessian):
112
+ # Make sure the different unrolled histogram computations give the same
113
+ # results as the naive one.
114
+ rng = np.random.RandomState(42)
115
+ n_samples = 10
116
+ n_bins = 5
117
+ sample_indices = np.arange(n_samples).astype(np.uint32)
118
+ binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=np.uint8)
119
+ ordered_gradients = rng.randn(n_samples).astype(G_H_DTYPE)
120
+ if constant_hessian:
121
+ ordered_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
122
+ else:
123
+ ordered_hessians = rng.lognormal(size=n_samples).astype(G_H_DTYPE)
124
+
125
+ hist_gc_root = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
126
+ hist_ghc_root = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
127
+ hist_gc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
128
+ hist_ghc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
129
+ hist_naive = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
130
+
131
+ _build_histogram_root_no_hessian(0, binned_feature, ordered_gradients, hist_gc_root)
132
+ _build_histogram_root(
133
+ 0, binned_feature, ordered_gradients, ordered_hessians, hist_ghc_root
134
+ )
135
+ _build_histogram_no_hessian(
136
+ 0, sample_indices, binned_feature, ordered_gradients, hist_gc
137
+ )
138
+ _build_histogram(
139
+ 0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist_ghc
140
+ )
141
+ _build_histogram_naive(
142
+ 0,
143
+ sample_indices,
144
+ binned_feature,
145
+ ordered_gradients,
146
+ ordered_hessians,
147
+ hist_naive,
148
+ )
149
+
150
+ hist_naive = hist_naive[0]
151
+ hist_gc_root = hist_gc_root[0]
152
+ hist_ghc_root = hist_ghc_root[0]
153
+ hist_gc = hist_gc[0]
154
+ hist_ghc = hist_ghc[0]
155
+ for hist in (hist_gc_root, hist_ghc_root, hist_gc, hist_ghc):
156
+ assert_array_equal(hist["count"], hist_naive["count"])
157
+ assert_allclose(hist["sum_gradients"], hist_naive["sum_gradients"])
158
+ for hist in (hist_ghc_root, hist_ghc):
159
+ assert_allclose(hist["sum_hessians"], hist_naive["sum_hessians"])
160
+ for hist in (hist_gc_root, hist_gc):
161
+ assert_array_equal(hist["sum_hessians"], np.zeros(n_bins))
162
+
163
+
164
+ @pytest.mark.parametrize("constant_hessian", [True, False])
165
+ def test_hist_subtraction(constant_hessian):
166
+ # Make sure the histogram subtraction trick gives the same result as the
167
+ # classical method.
168
+ rng = np.random.RandomState(42)
169
+ n_samples = 10
170
+ n_bins = 5
171
+ sample_indices = np.arange(n_samples).astype(np.uint32)
172
+ binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=np.uint8)
173
+ ordered_gradients = rng.randn(n_samples).astype(G_H_DTYPE)
174
+ if constant_hessian:
175
+ ordered_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
176
+ else:
177
+ ordered_hessians = rng.lognormal(size=n_samples).astype(G_H_DTYPE)
178
+
179
+ hist_parent = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
180
+ if constant_hessian:
181
+ _build_histogram_no_hessian(
182
+ 0, sample_indices, binned_feature, ordered_gradients, hist_parent
183
+ )
184
+ else:
185
+ _build_histogram(
186
+ 0,
187
+ sample_indices,
188
+ binned_feature,
189
+ ordered_gradients,
190
+ ordered_hessians,
191
+ hist_parent,
192
+ )
193
+
194
+ mask = rng.randint(0, 2, n_samples).astype(bool)
195
+
196
+ sample_indices_left = sample_indices[mask]
197
+ ordered_gradients_left = ordered_gradients[mask]
198
+ ordered_hessians_left = ordered_hessians[mask]
199
+ hist_left = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
200
+ if constant_hessian:
201
+ _build_histogram_no_hessian(
202
+ 0, sample_indices_left, binned_feature, ordered_gradients_left, hist_left
203
+ )
204
+ else:
205
+ _build_histogram(
206
+ 0,
207
+ sample_indices_left,
208
+ binned_feature,
209
+ ordered_gradients_left,
210
+ ordered_hessians_left,
211
+ hist_left,
212
+ )
213
+
214
+ sample_indices_right = sample_indices[~mask]
215
+ ordered_gradients_right = ordered_gradients[~mask]
216
+ ordered_hessians_right = ordered_hessians[~mask]
217
+ hist_right = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
218
+ if constant_hessian:
219
+ _build_histogram_no_hessian(
220
+ 0, sample_indices_right, binned_feature, ordered_gradients_right, hist_right
221
+ )
222
+ else:
223
+ _build_histogram(
224
+ 0,
225
+ sample_indices_right,
226
+ binned_feature,
227
+ ordered_gradients_right,
228
+ ordered_hessians_right,
229
+ hist_right,
230
+ )
231
+
232
+ hist_left_sub = np.copy(hist_parent)
233
+ hist_right_sub = np.copy(hist_parent)
234
+ _subtract_histograms(0, n_bins, hist_left_sub, hist_right)
235
+ _subtract_histograms(0, n_bins, hist_right_sub, hist_left)
236
+
237
+ for key in ("count", "sum_hessians", "sum_gradients"):
238
+ assert_allclose(hist_left[key], hist_left_sub[key], rtol=1e-6)
239
+ assert_allclose(hist_right[key], hist_right_sub[key], rtol=1e-6)