| # Author: Nelle Varoquaux, Andrew Tulloch, Antony Lee | |
| # Uses the pool adjacent violators algorithm (PAVA), with the | |
| # enhancement of searching for the longest decreasing subsequence to | |
| # pool at each step. | |
| import numpy as np | |
| from cython cimport floating | |
| def _inplace_contiguous_isotonic_regression(floating[::1] y, floating[::1] w): | |
| cdef: | |
| Py_ssize_t n = y.shape[0], i, k | |
| floating prev_y, sum_wy, sum_w | |
| Py_ssize_t[::1] target = np.arange(n, dtype=np.intp) | |
| # target describes a list of blocks. At any time, if [i..j] (inclusive) is | |
| # an active block, then target[i] := j and target[j] := i. | |
| # For "active" indices (block starts): | |
| # w[i] := sum{w_orig[j], j=[i..target[i]]} | |
| # y[i] := sum{y_orig[j]*w_orig[j], j=[i..target[i]]} / w[i] | |
| with nogil: | |
| i = 0 | |
| while i < n: | |
| k = target[i] + 1 | |
| if k == n: | |
| break | |
| if y[i] < y[k]: | |
| i = k | |
| continue | |
| sum_wy = w[i] * y[i] | |
| sum_w = w[i] | |
| while True: | |
| # We are within a decreasing subsequence. | |
| prev_y = y[k] | |
| sum_wy += w[k] * y[k] | |
| sum_w += w[k] | |
| k = target[k] + 1 | |
| if k == n or prev_y < y[k]: | |
| # Non-singleton decreasing subsequence is finished, | |
| # update first entry. | |
| y[i] = sum_wy / sum_w | |
| w[i] = sum_w | |
| target[i] = k - 1 | |
| target[k - 1] = i | |
| if i > 0: | |
| # Backtrack if we can. This makes the algorithm | |
| # single-pass and ensures O(n) complexity. | |
| i = target[i - 1] | |
| # Otherwise, restart from the same point. | |
| break | |
| # Reconstruct the solution. | |
| i = 0 | |
| while i < n: | |
| k = target[i] + 1 | |
| y[i + 1 : k] = y[i] | |
| i = k | |
| def _make_unique(const floating[::1] X, | |
| const floating[::1] y, | |
| const floating[::1] sample_weights): | |
| """Average targets for duplicate X, drop duplicates. | |
| Aggregates duplicate X values into a single X value where | |
| the target y is a (sample_weighted) average of the individual | |
| targets. | |
| Assumes that X is ordered, so that all duplicates follow each other. | |
| """ | |
| unique_values = len(np.unique(X)) | |
| if floating is float: | |
| dtype = np.float32 | |
| else: | |
| dtype = np.float64 | |
| cdef floating[::1] y_out = np.empty(unique_values, dtype=dtype) | |
| cdef floating[::1] x_out = np.empty_like(y_out) | |
| cdef floating[::1] weights_out = np.empty_like(y_out) | |
| cdef floating current_x = X[0] | |
| cdef floating current_y = 0 | |
| cdef floating current_weight = 0 | |
| cdef int i = 0 | |
| cdef int j | |
| cdef floating x | |
| cdef int n_samples = len(X) | |
| cdef floating eps = np.finfo(dtype).resolution | |
| for j in range(n_samples): | |
| x = X[j] | |
| if x - current_x >= eps: | |
| # next unique value | |
| x_out[i] = current_x | |
| weights_out[i] = current_weight | |
| y_out[i] = current_y / current_weight | |
| i += 1 | |
| current_x = x | |
| current_weight = sample_weights[j] | |
| current_y = y[j] * sample_weights[j] | |
| else: | |
| current_weight += sample_weights[j] | |
| current_y += y[j] * sample_weights[j] | |
| x_out[i] = current_x | |
| weights_out[i] = current_weight | |
| y_out[i] = current_y / current_weight | |
| return( | |
| np.asarray(x_out[:i+1]), | |
| np.asarray(y_out[:i+1]), | |
| np.asarray(weights_out[:i+1]), | |
| ) | |