| |
| import numpy as np |
|
|
| def arange_multi(starts, stops=None, lengths=None): |
| """ |
| Create concatenated ranges of integers for multiple start/length. |
| |
| Parameters |
| ---------- |
| starts : numpy.ndarray |
| Starts for each range |
| stops : numpy.ndarray |
| Stops for each range |
| lengths : numpy.ndarray |
| Lengths for each range. Either stops or lengths must be provided. |
| |
| Returns |
| ------- |
| concat_ranges : numpy.ndarray |
| Concatenated ranges. |
| |
| Notes |
| ----- |
| See the following illustrative example: |
| |
| starts = np.array([1, 3, 4, 6]) |
| stops = np.array([1, 5, 7, 6]) |
| |
| print arange_multi(starts, lengths) |
| >>> [3 4 4 5 6] |
| |
| From: https://codereview.stackexchange.com/questions/83018/vectorized-numpy-version-of-arange-with-multiple-start-stop |
| |
| """ |
|
|
| if (stops is None) == (lengths is None): |
| raise ValueError("Either stops or lengths must be provided!") |
|
|
| if lengths is None: |
| lengths = stops - starts |
|
|
| if np.isscalar(starts): |
| starts = np.full(len(stops), starts) |
|
|
| |
| cat_start = np.repeat(starts, lengths) |
|
|
| |
| cat_counter = np.arange(lengths.sum()) - np.repeat( |
| lengths.cumsum() - lengths, lengths |
| ) |
|
|
| |
| cat_range = cat_start + cat_counter |
|
|
| return cat_range |
|
|
| def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False): |
| """ |
| Take two sets of intervals and return the indices of pairs of overlapping intervals. |
| |
| Parameters |
| ---------- |
| starts1, ends1, starts2, ends2 : numpy.ndarray |
| Interval coordinates. Warning: if provided as pandas.Series, indices |
| will be ignored. |
| |
| closed : bool |
| If True, then treat intervals as closed and report single-point overlaps. |
| Returns |
| ------- |
| overlap_ids : numpy.ndarray |
| An Nx2 array containing the indices of pairs of overlapping intervals. |
| The 1st column contains ids from the 1st set, the 2nd column has ids |
| from the 2nd set. |
| |
| """ |
|
|
| |
| starts1 = np.asarray(starts1) |
| ends1 = np.asarray(ends1) |
| starts2 = np.asarray(starts2) |
| ends2 = np.asarray(ends2) |
|
|
| |
| n1 = len(starts1) |
| n2 = len(starts2) |
| ids1 = np.arange(0, n1) |
| ids2 = np.arange(0, n2) |
|
|
| |
| order1 = np.lexsort([ends1, starts1]) |
| order2 = np.lexsort([ends2, starts2]) |
| starts1, ends1, ids1 = starts1[order1], ends1[order1], ids1[order1] |
| starts2, ends2, ids2 = starts2[order2], ends2[order2], ids2[order2] |
|
|
| |
| match_2in1_starts = np.searchsorted(starts2, starts1, "left") |
| match_2in1_ends = np.searchsorted(starts2, ends1, "right" if closed else "left") |
| |
| match_1in2_starts = np.searchsorted(starts1, starts2, "right") |
| match_1in2_ends = np.searchsorted(starts1, ends2, "right" if closed else "left") |
|
|
| |
| match_2in1_mask = match_2in1_ends > match_2in1_starts |
| match_1in2_mask = match_1in2_ends > match_1in2_starts |
| match_2in1_starts, match_2in1_ends = ( |
| match_2in1_starts[match_2in1_mask], |
| match_2in1_ends[match_2in1_mask], |
| ) |
| match_1in2_starts, match_1in2_ends = ( |
| match_1in2_starts[match_1in2_mask], |
| match_1in2_ends[match_1in2_mask], |
| ) |
|
|
| |
| overlap_ids = np.block( |
| [ |
| [ |
| np.repeat(ids1[match_2in1_mask], match_2in1_ends - match_2in1_starts)[ |
| :, None |
| ], |
| ids2[arange_multi(match_2in1_starts, match_2in1_ends)][:, None], |
| ], |
| [ |
| ids1[arange_multi(match_1in2_starts, match_1in2_ends)][:, None], |
| np.repeat(ids2[match_1in2_mask], match_1in2_ends - match_1in2_starts)[ |
| :, None |
| ], |
| ], |
| ] |
| ) |
|
|
| if sort: |
| |
| overlap_ids = overlap_ids[np.lexsort([overlap_ids[:, 1], overlap_ids[:, 0]])] |
|
|
| return overlap_ids |