Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\sklearn\ensemble\_hist_gradient_boosting\tests\test_histogram.py with huggingface_hub
Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//sklearn//ensemble//_hist_gradient_boosting//tests//test_histogram.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
from numpy.testing import assert_allclose, assert_array_equal
|
| 4 |
+
|
| 5 |
+
from sklearn.ensemble._hist_gradient_boosting.common import (
|
| 6 |
+
G_H_DTYPE,
|
| 7 |
+
HISTOGRAM_DTYPE,
|
| 8 |
+
X_BINNED_DTYPE,
|
| 9 |
+
)
|
| 10 |
+
from sklearn.ensemble._hist_gradient_boosting.histogram import (
|
| 11 |
+
_build_histogram,
|
| 12 |
+
_build_histogram_naive,
|
| 13 |
+
_build_histogram_no_hessian,
|
| 14 |
+
_build_histogram_root,
|
| 15 |
+
_build_histogram_root_no_hessian,
|
| 16 |
+
_subtract_histograms,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram])
|
| 21 |
+
def test_build_histogram(build_func):
|
| 22 |
+
binned_feature = np.array([0, 2, 0, 1, 2, 0, 2, 1], dtype=X_BINNED_DTYPE)
|
| 23 |
+
|
| 24 |
+
# Small sample_indices (below unrolling threshold)
|
| 25 |
+
ordered_gradients = np.array([0, 1, 3], dtype=G_H_DTYPE)
|
| 26 |
+
ordered_hessians = np.array([1, 1, 2], dtype=G_H_DTYPE)
|
| 27 |
+
|
| 28 |
+
sample_indices = np.array([0, 2, 3], dtype=np.uint32)
|
| 29 |
+
hist = np.zeros((1, 3), dtype=HISTOGRAM_DTYPE)
|
| 30 |
+
build_func(
|
| 31 |
+
0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist
|
| 32 |
+
)
|
| 33 |
+
hist = hist[0]
|
| 34 |
+
assert_array_equal(hist["count"], [2, 1, 0])
|
| 35 |
+
assert_allclose(hist["sum_gradients"], [1, 3, 0])
|
| 36 |
+
assert_allclose(hist["sum_hessians"], [2, 2, 0])
|
| 37 |
+
|
| 38 |
+
# Larger sample_indices (above unrolling threshold)
|
| 39 |
+
sample_indices = np.array([0, 2, 3, 6, 7], dtype=np.uint32)
|
| 40 |
+
ordered_gradients = np.array([0, 1, 3, 0, 1], dtype=G_H_DTYPE)
|
| 41 |
+
ordered_hessians = np.array([1, 1, 2, 1, 0], dtype=G_H_DTYPE)
|
| 42 |
+
|
| 43 |
+
hist = np.zeros((1, 3), dtype=HISTOGRAM_DTYPE)
|
| 44 |
+
build_func(
|
| 45 |
+
0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist
|
| 46 |
+
)
|
| 47 |
+
hist = hist[0]
|
| 48 |
+
assert_array_equal(hist["count"], [2, 2, 1])
|
| 49 |
+
assert_allclose(hist["sum_gradients"], [1, 4, 0])
|
| 50 |
+
assert_allclose(hist["sum_hessians"], [2, 2, 1])
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_histogram_sample_order_independence():
|
| 54 |
+
# Make sure the order of the samples has no impact on the histogram
|
| 55 |
+
# computations
|
| 56 |
+
rng = np.random.RandomState(42)
|
| 57 |
+
n_sub_samples = 100
|
| 58 |
+
n_samples = 1000
|
| 59 |
+
n_bins = 256
|
| 60 |
+
|
| 61 |
+
binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=X_BINNED_DTYPE)
|
| 62 |
+
sample_indices = rng.choice(
|
| 63 |
+
np.arange(n_samples, dtype=np.uint32), n_sub_samples, replace=False
|
| 64 |
+
)
|
| 65 |
+
ordered_gradients = rng.randn(n_sub_samples).astype(G_H_DTYPE)
|
| 66 |
+
hist_gc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 67 |
+
_build_histogram_no_hessian(
|
| 68 |
+
0, sample_indices, binned_feature, ordered_gradients, hist_gc
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
ordered_hessians = rng.exponential(size=n_sub_samples).astype(G_H_DTYPE)
|
| 72 |
+
hist_ghc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 73 |
+
_build_histogram(
|
| 74 |
+
0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist_ghc
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
permutation = rng.permutation(n_sub_samples)
|
| 78 |
+
hist_gc_perm = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 79 |
+
_build_histogram_no_hessian(
|
| 80 |
+
0,
|
| 81 |
+
sample_indices[permutation],
|
| 82 |
+
binned_feature,
|
| 83 |
+
ordered_gradients[permutation],
|
| 84 |
+
hist_gc_perm,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
hist_ghc_perm = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 88 |
+
_build_histogram(
|
| 89 |
+
0,
|
| 90 |
+
sample_indices[permutation],
|
| 91 |
+
binned_feature,
|
| 92 |
+
ordered_gradients[permutation],
|
| 93 |
+
ordered_hessians[permutation],
|
| 94 |
+
hist_ghc_perm,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
hist_gc = hist_gc[0]
|
| 98 |
+
hist_ghc = hist_ghc[0]
|
| 99 |
+
hist_gc_perm = hist_gc_perm[0]
|
| 100 |
+
hist_ghc_perm = hist_ghc_perm[0]
|
| 101 |
+
|
| 102 |
+
assert_allclose(hist_gc["sum_gradients"], hist_gc_perm["sum_gradients"])
|
| 103 |
+
assert_array_equal(hist_gc["count"], hist_gc_perm["count"])
|
| 104 |
+
|
| 105 |
+
assert_allclose(hist_ghc["sum_gradients"], hist_ghc_perm["sum_gradients"])
|
| 106 |
+
assert_allclose(hist_ghc["sum_hessians"], hist_ghc_perm["sum_hessians"])
|
| 107 |
+
assert_array_equal(hist_ghc["count"], hist_ghc_perm["count"])
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@pytest.mark.parametrize("constant_hessian", [True, False])
|
| 111 |
+
def test_unrolled_equivalent_to_naive(constant_hessian):
|
| 112 |
+
# Make sure the different unrolled histogram computations give the same
|
| 113 |
+
# results as the naive one.
|
| 114 |
+
rng = np.random.RandomState(42)
|
| 115 |
+
n_samples = 10
|
| 116 |
+
n_bins = 5
|
| 117 |
+
sample_indices = np.arange(n_samples).astype(np.uint32)
|
| 118 |
+
binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=np.uint8)
|
| 119 |
+
ordered_gradients = rng.randn(n_samples).astype(G_H_DTYPE)
|
| 120 |
+
if constant_hessian:
|
| 121 |
+
ordered_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
|
| 122 |
+
else:
|
| 123 |
+
ordered_hessians = rng.lognormal(size=n_samples).astype(G_H_DTYPE)
|
| 124 |
+
|
| 125 |
+
hist_gc_root = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 126 |
+
hist_ghc_root = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 127 |
+
hist_gc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 128 |
+
hist_ghc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 129 |
+
hist_naive = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 130 |
+
|
| 131 |
+
_build_histogram_root_no_hessian(0, binned_feature, ordered_gradients, hist_gc_root)
|
| 132 |
+
_build_histogram_root(
|
| 133 |
+
0, binned_feature, ordered_gradients, ordered_hessians, hist_ghc_root
|
| 134 |
+
)
|
| 135 |
+
_build_histogram_no_hessian(
|
| 136 |
+
0, sample_indices, binned_feature, ordered_gradients, hist_gc
|
| 137 |
+
)
|
| 138 |
+
_build_histogram(
|
| 139 |
+
0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist_ghc
|
| 140 |
+
)
|
| 141 |
+
_build_histogram_naive(
|
| 142 |
+
0,
|
| 143 |
+
sample_indices,
|
| 144 |
+
binned_feature,
|
| 145 |
+
ordered_gradients,
|
| 146 |
+
ordered_hessians,
|
| 147 |
+
hist_naive,
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
hist_naive = hist_naive[0]
|
| 151 |
+
hist_gc_root = hist_gc_root[0]
|
| 152 |
+
hist_ghc_root = hist_ghc_root[0]
|
| 153 |
+
hist_gc = hist_gc[0]
|
| 154 |
+
hist_ghc = hist_ghc[0]
|
| 155 |
+
for hist in (hist_gc_root, hist_ghc_root, hist_gc, hist_ghc):
|
| 156 |
+
assert_array_equal(hist["count"], hist_naive["count"])
|
| 157 |
+
assert_allclose(hist["sum_gradients"], hist_naive["sum_gradients"])
|
| 158 |
+
for hist in (hist_ghc_root, hist_ghc):
|
| 159 |
+
assert_allclose(hist["sum_hessians"], hist_naive["sum_hessians"])
|
| 160 |
+
for hist in (hist_gc_root, hist_gc):
|
| 161 |
+
assert_array_equal(hist["sum_hessians"], np.zeros(n_bins))
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
@pytest.mark.parametrize("constant_hessian", [True, False])
|
| 165 |
+
def test_hist_subtraction(constant_hessian):
|
| 166 |
+
# Make sure the histogram subtraction trick gives the same result as the
|
| 167 |
+
# classical method.
|
| 168 |
+
rng = np.random.RandomState(42)
|
| 169 |
+
n_samples = 10
|
| 170 |
+
n_bins = 5
|
| 171 |
+
sample_indices = np.arange(n_samples).astype(np.uint32)
|
| 172 |
+
binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=np.uint8)
|
| 173 |
+
ordered_gradients = rng.randn(n_samples).astype(G_H_DTYPE)
|
| 174 |
+
if constant_hessian:
|
| 175 |
+
ordered_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
|
| 176 |
+
else:
|
| 177 |
+
ordered_hessians = rng.lognormal(size=n_samples).astype(G_H_DTYPE)
|
| 178 |
+
|
| 179 |
+
hist_parent = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 180 |
+
if constant_hessian:
|
| 181 |
+
_build_histogram_no_hessian(
|
| 182 |
+
0, sample_indices, binned_feature, ordered_gradients, hist_parent
|
| 183 |
+
)
|
| 184 |
+
else:
|
| 185 |
+
_build_histogram(
|
| 186 |
+
0,
|
| 187 |
+
sample_indices,
|
| 188 |
+
binned_feature,
|
| 189 |
+
ordered_gradients,
|
| 190 |
+
ordered_hessians,
|
| 191 |
+
hist_parent,
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
mask = rng.randint(0, 2, n_samples).astype(bool)
|
| 195 |
+
|
| 196 |
+
sample_indices_left = sample_indices[mask]
|
| 197 |
+
ordered_gradients_left = ordered_gradients[mask]
|
| 198 |
+
ordered_hessians_left = ordered_hessians[mask]
|
| 199 |
+
hist_left = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 200 |
+
if constant_hessian:
|
| 201 |
+
_build_histogram_no_hessian(
|
| 202 |
+
0, sample_indices_left, binned_feature, ordered_gradients_left, hist_left
|
| 203 |
+
)
|
| 204 |
+
else:
|
| 205 |
+
_build_histogram(
|
| 206 |
+
0,
|
| 207 |
+
sample_indices_left,
|
| 208 |
+
binned_feature,
|
| 209 |
+
ordered_gradients_left,
|
| 210 |
+
ordered_hessians_left,
|
| 211 |
+
hist_left,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
sample_indices_right = sample_indices[~mask]
|
| 215 |
+
ordered_gradients_right = ordered_gradients[~mask]
|
| 216 |
+
ordered_hessians_right = ordered_hessians[~mask]
|
| 217 |
+
hist_right = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
|
| 218 |
+
if constant_hessian:
|
| 219 |
+
_build_histogram_no_hessian(
|
| 220 |
+
0, sample_indices_right, binned_feature, ordered_gradients_right, hist_right
|
| 221 |
+
)
|
| 222 |
+
else:
|
| 223 |
+
_build_histogram(
|
| 224 |
+
0,
|
| 225 |
+
sample_indices_right,
|
| 226 |
+
binned_feature,
|
| 227 |
+
ordered_gradients_right,
|
| 228 |
+
ordered_hessians_right,
|
| 229 |
+
hist_right,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
hist_left_sub = np.copy(hist_parent)
|
| 233 |
+
hist_right_sub = np.copy(hist_parent)
|
| 234 |
+
_subtract_histograms(0, n_bins, hist_left_sub, hist_right)
|
| 235 |
+
_subtract_histograms(0, n_bins, hist_right_sub, hist_left)
|
| 236 |
+
|
| 237 |
+
for key in ("count", "sum_hessians", "sum_gradients"):
|
| 238 |
+
assert_allclose(hist_left[key], hist_left_sub[key], rtol=1e-6)
|
| 239 |
+
assert_allclose(hist_right[key], hist_right_sub[key], rtol=1e-6)
|