Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py +78 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py +163 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py +104 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py +11 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py +97 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py +266 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py +13 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py +89 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py +181 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py +45 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py +46 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py +53 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py +315 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py +103 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py +254 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py +535 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_equals.py +36 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_formats.py +119 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_indexing.py +674 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval.py +918 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py +369 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py +208 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py +44 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py +13 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py +208 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py +27 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py +263 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py +30 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py +860 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py +201 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py +96 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py +190 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py +284 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py +249 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py +289 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py +268 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py +46 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py +188 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py +174 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py +224 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py +772 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py +349 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py +78 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py +95 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py +611 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
Index,
|
| 7 |
+
MultiIndex,
|
| 8 |
+
Series,
|
| 9 |
+
)
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestIndexConstructor:
|
| 14 |
+
# Tests for the Index constructor, specifically for cases that do
|
| 15 |
+
# not return a subclass
|
| 16 |
+
|
| 17 |
+
@pytest.mark.parametrize("value", [1, np.int64(1)])
|
| 18 |
+
def test_constructor_corner(self, value):
|
| 19 |
+
# corner case
|
| 20 |
+
msg = (
|
| 21 |
+
r"Index\(\.\.\.\) must be called with a collection of some "
|
| 22 |
+
f"kind, {value} was passed"
|
| 23 |
+
)
|
| 24 |
+
with pytest.raises(TypeError, match=msg):
|
| 25 |
+
Index(value)
|
| 26 |
+
|
| 27 |
+
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
|
| 28 |
+
def test_construction_list_mixed_tuples(self, index_vals):
|
| 29 |
+
# see gh-10697: if we are constructing from a mixed list of tuples,
|
| 30 |
+
# make sure that we are independent of the sorting order.
|
| 31 |
+
index = Index(index_vals)
|
| 32 |
+
assert isinstance(index, Index)
|
| 33 |
+
assert not isinstance(index, MultiIndex)
|
| 34 |
+
|
| 35 |
+
def test_constructor_cast(self):
|
| 36 |
+
msg = "could not convert string to float"
|
| 37 |
+
with pytest.raises(ValueError, match=msg):
|
| 38 |
+
Index(["a", "b", "c"], dtype=float)
|
| 39 |
+
|
| 40 |
+
@pytest.mark.parametrize("tuple_list", [[()], [(), ()]])
|
| 41 |
+
def test_construct_empty_tuples(self, tuple_list):
|
| 42 |
+
# GH #45608
|
| 43 |
+
result = Index(tuple_list)
|
| 44 |
+
expected = MultiIndex.from_tuples(tuple_list)
|
| 45 |
+
|
| 46 |
+
tm.assert_index_equal(result, expected)
|
| 47 |
+
|
| 48 |
+
def test_index_string_inference(self):
|
| 49 |
+
# GH#54430
|
| 50 |
+
expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
|
| 51 |
+
with pd.option_context("future.infer_string", True):
|
| 52 |
+
ser = Index(["a", "b"])
|
| 53 |
+
tm.assert_index_equal(ser, expected)
|
| 54 |
+
|
| 55 |
+
expected = Index(["a", 1], dtype="object")
|
| 56 |
+
with pd.option_context("future.infer_string", True):
|
| 57 |
+
ser = Index(["a", 1])
|
| 58 |
+
tm.assert_index_equal(ser, expected)
|
| 59 |
+
|
| 60 |
+
def test_inference_on_pandas_objects(self):
|
| 61 |
+
# GH#56012
|
| 62 |
+
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
|
| 63 |
+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
| 64 |
+
result = Index(idx)
|
| 65 |
+
assert result.dtype != np.object_
|
| 66 |
+
|
| 67 |
+
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
|
| 68 |
+
|
| 69 |
+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
| 70 |
+
result = Index(ser)
|
| 71 |
+
assert result.dtype != np.object_
|
| 72 |
+
|
| 73 |
+
def test_constructor_not_read_only(self):
|
| 74 |
+
# GH#57130
|
| 75 |
+
ser = Series([1, 2], dtype=object)
|
| 76 |
+
with pd.option_context("mode.copy_on_write", True):
|
| 77 |
+
idx = Index(ser)
|
| 78 |
+
assert idx._values.flags.writeable
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas._config import using_string_dtype
|
| 5 |
+
import pandas._config.config as cf
|
| 6 |
+
|
| 7 |
+
from pandas import Index
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TestIndexRendering:
|
| 12 |
+
def test_repr_is_valid_construction_code(self):
|
| 13 |
+
# for the case of Index, where the repr is traditional rather than
|
| 14 |
+
# stylized
|
| 15 |
+
idx = Index(["a", "b"])
|
| 16 |
+
res = eval(repr(idx))
|
| 17 |
+
tm.assert_index_equal(res, idx)
|
| 18 |
+
|
| 19 |
+
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
|
| 20 |
+
@pytest.mark.parametrize(
|
| 21 |
+
"index,expected",
|
| 22 |
+
[
|
| 23 |
+
# ASCII
|
| 24 |
+
# short
|
| 25 |
+
(
|
| 26 |
+
Index(["a", "bb", "ccc"]),
|
| 27 |
+
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
|
| 28 |
+
),
|
| 29 |
+
# multiple lines
|
| 30 |
+
(
|
| 31 |
+
Index(["a", "bb", "ccc"] * 10),
|
| 32 |
+
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
| 33 |
+
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
| 34 |
+
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
| 35 |
+
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
| 36 |
+
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
| 37 |
+
" dtype='object')",
|
| 38 |
+
),
|
| 39 |
+
# truncated
|
| 40 |
+
(
|
| 41 |
+
Index(["a", "bb", "ccc"] * 100),
|
| 42 |
+
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
|
| 43 |
+
" ...\n"
|
| 44 |
+
" 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
| 45 |
+
" dtype='object', length=300)",
|
| 46 |
+
),
|
| 47 |
+
# Non-ASCII
|
| 48 |
+
# short
|
| 49 |
+
(
|
| 50 |
+
Index(["あ", "いい", "ううう"]),
|
| 51 |
+
"""Index(['あ', 'いい', 'ううう'], dtype='object')""",
|
| 52 |
+
),
|
| 53 |
+
# multiple lines
|
| 54 |
+
(
|
| 55 |
+
Index(["あ", "いい", "ううう"] * 10),
|
| 56 |
+
(
|
| 57 |
+
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
| 58 |
+
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
| 59 |
+
" 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
| 60 |
+
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
| 61 |
+
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
| 62 |
+
"'ううう'],\n"
|
| 63 |
+
" dtype='object')"
|
| 64 |
+
),
|
| 65 |
+
),
|
| 66 |
+
# truncated
|
| 67 |
+
(
|
| 68 |
+
Index(["あ", "いい", "ううう"] * 100),
|
| 69 |
+
(
|
| 70 |
+
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
| 71 |
+
"'あ', 'いい', 'ううう', 'あ',\n"
|
| 72 |
+
" ...\n"
|
| 73 |
+
" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
| 74 |
+
"'ううう', 'あ', 'いい', 'ううう'],\n"
|
| 75 |
+
" dtype='object', length=300)"
|
| 76 |
+
),
|
| 77 |
+
),
|
| 78 |
+
],
|
| 79 |
+
)
|
| 80 |
+
def test_string_index_repr(self, index, expected):
|
| 81 |
+
result = repr(index)
|
| 82 |
+
assert result == expected
|
| 83 |
+
|
| 84 |
+
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
|
| 85 |
+
@pytest.mark.parametrize(
|
| 86 |
+
"index,expected",
|
| 87 |
+
[
|
| 88 |
+
# short
|
| 89 |
+
(
|
| 90 |
+
Index(["あ", "いい", "ううう"]),
|
| 91 |
+
("Index(['あ', 'いい', 'ううう'], dtype='object')"),
|
| 92 |
+
),
|
| 93 |
+
# multiple lines
|
| 94 |
+
(
|
| 95 |
+
Index(["あ", "いい", "ううう"] * 10),
|
| 96 |
+
(
|
| 97 |
+
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
| 98 |
+
"'ううう', 'あ', 'いい', 'ううう',\n"
|
| 99 |
+
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
| 100 |
+
"'ううう', 'あ', 'いい', 'ううう',\n"
|
| 101 |
+
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
| 102 |
+
"'ううう', 'あ', 'いい', 'ううう',\n"
|
| 103 |
+
" 'あ', 'いい', 'ううう'],\n"
|
| 104 |
+
" dtype='object')"
|
| 105 |
+
""
|
| 106 |
+
),
|
| 107 |
+
),
|
| 108 |
+
# truncated
|
| 109 |
+
(
|
| 110 |
+
Index(["あ", "いい", "ううう"] * 100),
|
| 111 |
+
(
|
| 112 |
+
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
| 113 |
+
"'ううう', 'あ', 'いい', 'ううう',\n"
|
| 114 |
+
" 'あ',\n"
|
| 115 |
+
" ...\n"
|
| 116 |
+
" 'ううう', 'あ', 'いい', 'ううう', 'あ', "
|
| 117 |
+
"'いい', 'ううう', 'あ', 'いい',\n"
|
| 118 |
+
" 'ううう'],\n"
|
| 119 |
+
" dtype='object', length=300)"
|
| 120 |
+
),
|
| 121 |
+
),
|
| 122 |
+
],
|
| 123 |
+
)
|
| 124 |
+
def test_string_index_repr_with_unicode_option(self, index, expected):
|
| 125 |
+
# Enable Unicode option -----------------------------------------
|
| 126 |
+
with cf.option_context("display.unicode.east_asian_width", True):
|
| 127 |
+
result = repr(index)
|
| 128 |
+
assert result == expected
|
| 129 |
+
|
| 130 |
+
def test_repr_summary(self):
|
| 131 |
+
with cf.option_context("display.max_seq_items", 10):
|
| 132 |
+
result = repr(Index(np.arange(1000)))
|
| 133 |
+
assert len(result) < 200
|
| 134 |
+
assert "..." in result
|
| 135 |
+
|
| 136 |
+
def test_summary_bug(self):
|
| 137 |
+
# GH#3869
|
| 138 |
+
ind = Index(["{other}%s", "~:{range}:0"], name="A")
|
| 139 |
+
result = ind._summary()
|
| 140 |
+
# shouldn't be formatted accidentally.
|
| 141 |
+
assert "~:{range}:0" in result
|
| 142 |
+
assert "{other}%s" in result
|
| 143 |
+
|
| 144 |
+
def test_index_repr_bool_nan(self):
|
| 145 |
+
# GH32146
|
| 146 |
+
arr = Index([True, False, np.nan], dtype=object)
|
| 147 |
+
msg = "Index.format is deprecated"
|
| 148 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 149 |
+
exp1 = arr.format()
|
| 150 |
+
out1 = ["True", "False", "NaN"]
|
| 151 |
+
assert out1 == exp1
|
| 152 |
+
|
| 153 |
+
exp2 = repr(arr)
|
| 154 |
+
out2 = "Index([True, False, nan], dtype='object')"
|
| 155 |
+
assert out2 == exp2
|
| 156 |
+
|
| 157 |
+
def test_format_different_scalar_lengths(self):
|
| 158 |
+
# GH#35439
|
| 159 |
+
idx = Index(["aaaaaaaaa", "b"])
|
| 160 |
+
expected = ["aaaaaaaaa", "b"]
|
| 161 |
+
msg = r"Index\.format is deprecated"
|
| 162 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 163 |
+
assert idx.format() == expected
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas._libs import index as libindex
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
Index,
|
| 9 |
+
NaT,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class TestGetSliceBounds:
|
| 15 |
+
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
| 16 |
+
def test_get_slice_bounds_within(self, side, expected):
|
| 17 |
+
index = Index(list("abcdef"))
|
| 18 |
+
result = index.get_slice_bound("e", side=side)
|
| 19 |
+
assert result == expected
|
| 20 |
+
|
| 21 |
+
@pytest.mark.parametrize("side", ["left", "right"])
|
| 22 |
+
@pytest.mark.parametrize(
|
| 23 |
+
"data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
|
| 24 |
+
)
|
| 25 |
+
def test_get_slice_bounds_outside(self, side, expected, data, bound):
|
| 26 |
+
index = Index(data)
|
| 27 |
+
result = index.get_slice_bound(bound, side=side)
|
| 28 |
+
assert result == expected
|
| 29 |
+
|
| 30 |
+
def test_get_slice_bounds_invalid_side(self):
|
| 31 |
+
with pytest.raises(ValueError, match="Invalid value for side kwarg"):
|
| 32 |
+
Index([]).get_slice_bound("a", side="middle")
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class TestGetIndexerNonUnique:
|
| 36 |
+
def test_get_indexer_non_unique_dtype_mismatch(self):
|
| 37 |
+
# GH#25459
|
| 38 |
+
indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
|
| 39 |
+
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
|
| 40 |
+
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
|
| 41 |
+
|
| 42 |
+
@pytest.mark.parametrize(
|
| 43 |
+
"idx_values,idx_non_unique",
|
| 44 |
+
[
|
| 45 |
+
([np.nan, 100, 200, 100], [np.nan, 100]),
|
| 46 |
+
([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]),
|
| 47 |
+
],
|
| 48 |
+
)
|
| 49 |
+
def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique):
|
| 50 |
+
indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan]))
|
| 51 |
+
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
|
| 52 |
+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
|
| 53 |
+
|
| 54 |
+
indexes, missing = Index(idx_values).get_indexer_non_unique(
|
| 55 |
+
Index(idx_non_unique)
|
| 56 |
+
)
|
| 57 |
+
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
|
| 58 |
+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class TestGetLoc:
|
| 62 |
+
@pytest.mark.slow # to_flat_index takes a while
|
| 63 |
+
def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
|
| 64 |
+
# Go through the libindex path for which using
|
| 65 |
+
# _bin_search vs ndarray.searchsorted makes a difference
|
| 66 |
+
|
| 67 |
+
with monkeypatch.context():
|
| 68 |
+
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
|
| 69 |
+
lev = list("ABCD")
|
| 70 |
+
dti = pd.date_range("2016-01-01", periods=10)
|
| 71 |
+
|
| 72 |
+
mi = pd.MultiIndex.from_product([lev, range(5), dti])
|
| 73 |
+
oidx = mi.to_flat_index()
|
| 74 |
+
|
| 75 |
+
loc = len(oidx) // 2
|
| 76 |
+
tup = oidx[loc]
|
| 77 |
+
|
| 78 |
+
res = oidx.get_loc(tup)
|
| 79 |
+
assert res == loc
|
| 80 |
+
|
| 81 |
+
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
|
| 82 |
+
# case that goes through _maybe_get_bool_indexer
|
| 83 |
+
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
|
| 84 |
+
|
| 85 |
+
# we dont raise KeyError on nan
|
| 86 |
+
res = idx.get_loc(np.nan)
|
| 87 |
+
assert res == 1
|
| 88 |
+
|
| 89 |
+
# we only match on None, not on np.nan
|
| 90 |
+
res = idx.get_loc(None)
|
| 91 |
+
expected = np.array([False, False, True, False, False, True])
|
| 92 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 93 |
+
|
| 94 |
+
# we don't match at all on mismatched NA
|
| 95 |
+
with pytest.raises(KeyError, match="NaT"):
|
| 96 |
+
idx.get_loc(NaT)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def test_getitem_boolean_ea_indexer():
|
| 100 |
+
# GH#45806
|
| 101 |
+
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
| 102 |
+
result = ser.index[ser]
|
| 103 |
+
expected = Index([0])
|
| 104 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas import Index
|
| 2 |
+
import pandas._testing as tm
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_pickle_preserves_object_dtype():
|
| 6 |
+
# GH#43188, GH#43155 don't infer numeric dtype
|
| 7 |
+
index = Index([1, 2, 3], dtype=object)
|
| 8 |
+
|
| 9 |
+
result = tm.round_trip_pickle(index)
|
| 10 |
+
assert result.dtype == object
|
| 11 |
+
tm.assert_index_equal(index, result)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for ndarray-like method on the base Index class
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from pandas import Index
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestReshape:
|
| 13 |
+
def test_repeat(self):
|
| 14 |
+
repeats = 2
|
| 15 |
+
index = Index([1, 2, 3])
|
| 16 |
+
expected = Index([1, 1, 2, 2, 3, 3])
|
| 17 |
+
|
| 18 |
+
result = index.repeat(repeats)
|
| 19 |
+
tm.assert_index_equal(result, expected)
|
| 20 |
+
|
| 21 |
+
def test_insert(self):
|
| 22 |
+
# GH 7256
|
| 23 |
+
# validate neg/pos inserts
|
| 24 |
+
result = Index(["b", "c", "d"])
|
| 25 |
+
|
| 26 |
+
# test 0th element
|
| 27 |
+
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
|
| 28 |
+
|
| 29 |
+
# test Nth element that follows Python list behavior
|
| 30 |
+
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
|
| 31 |
+
|
| 32 |
+
# test loc +/- neq (0, -1)
|
| 33 |
+
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
|
| 34 |
+
|
| 35 |
+
# test empty
|
| 36 |
+
null_index = Index([])
|
| 37 |
+
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
|
| 38 |
+
|
| 39 |
+
def test_insert_missing(self, request, nulls_fixture, using_infer_string):
|
| 40 |
+
if using_infer_string and nulls_fixture is pd.NA:
|
| 41 |
+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
|
| 42 |
+
# GH#22295
|
| 43 |
+
# test there is no mangling of NA values
|
| 44 |
+
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
|
| 45 |
+
result = Index(list("abc"), dtype=object).insert(
|
| 46 |
+
1, Index([nulls_fixture], dtype=object)
|
| 47 |
+
)
|
| 48 |
+
tm.assert_index_equal(result, expected)
|
| 49 |
+
|
| 50 |
+
@pytest.mark.parametrize(
|
| 51 |
+
"val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
|
| 52 |
+
)
|
| 53 |
+
@pytest.mark.parametrize("loc", [-1, 2])
|
| 54 |
+
def test_insert_datetime_into_object(self, loc, val):
|
| 55 |
+
# GH#44509
|
| 56 |
+
idx = Index(["1", "2", "3"])
|
| 57 |
+
result = idx.insert(loc, val)
|
| 58 |
+
expected = Index(["1", "2", val, "3"])
|
| 59 |
+
tm.assert_index_equal(result, expected)
|
| 60 |
+
assert type(expected[2]) is type(val)
|
| 61 |
+
|
| 62 |
+
def test_insert_none_into_string_numpy(self, string_dtype_no_object):
|
| 63 |
+
# GH#55365
|
| 64 |
+
index = Index(["a", "b", "c"], dtype=string_dtype_no_object)
|
| 65 |
+
result = index.insert(-1, None)
|
| 66 |
+
expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object)
|
| 67 |
+
tm.assert_index_equal(result, expected)
|
| 68 |
+
|
| 69 |
+
@pytest.mark.parametrize(
|
| 70 |
+
"pos,expected",
|
| 71 |
+
[
|
| 72 |
+
(0, Index(["b", "c", "d"], name="index")),
|
| 73 |
+
(-1, Index(["a", "b", "c"], name="index")),
|
| 74 |
+
],
|
| 75 |
+
)
|
| 76 |
+
def test_delete(self, pos, expected):
|
| 77 |
+
index = Index(["a", "b", "c", "d"], name="index")
|
| 78 |
+
result = index.delete(pos)
|
| 79 |
+
tm.assert_index_equal(result, expected)
|
| 80 |
+
assert result.name == expected.name
|
| 81 |
+
|
| 82 |
+
def test_delete_raises(self):
|
| 83 |
+
index = Index(["a", "b", "c", "d"], name="index")
|
| 84 |
+
msg = "index 5 is out of bounds for axis 0 with size 4"
|
| 85 |
+
with pytest.raises(IndexError, match=msg):
|
| 86 |
+
index.delete(5)
|
| 87 |
+
|
| 88 |
+
def test_append_multiple(self):
|
| 89 |
+
index = Index(["a", "b", "c", "d", "e", "f"])
|
| 90 |
+
|
| 91 |
+
foos = [index[:2], index[2:4], index[4:]]
|
| 92 |
+
result = foos[0].append(foos[1:])
|
| 93 |
+
tm.assert_index_equal(result, index)
|
| 94 |
+
|
| 95 |
+
# empty
|
| 96 |
+
result = index.append([])
|
| 97 |
+
tm.assert_index_equal(result, index)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
Index,
|
| 9 |
+
Series,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
from pandas.core.algorithms import safe_sort
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def equal_contents(arr1, arr2) -> bool:
|
| 16 |
+
"""
|
| 17 |
+
Checks if the set of unique elements of arr1 and arr2 are equivalent.
|
| 18 |
+
"""
|
| 19 |
+
return frozenset(arr1) == frozenset(arr2)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TestIndexSetOps:
|
| 23 |
+
@pytest.mark.parametrize(
|
| 24 |
+
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
| 25 |
+
)
|
| 26 |
+
def test_setops_sort_validation(self, method):
|
| 27 |
+
idx1 = Index(["a", "b"])
|
| 28 |
+
idx2 = Index(["b", "c"])
|
| 29 |
+
|
| 30 |
+
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
| 31 |
+
getattr(idx1, method)(idx2, sort=2)
|
| 32 |
+
|
| 33 |
+
# sort=True is supported as of GH#??
|
| 34 |
+
getattr(idx1, method)(idx2, sort=True)
|
| 35 |
+
|
| 36 |
+
def test_setops_preserve_object_dtype(self):
|
| 37 |
+
idx = Index([1, 2, 3], dtype=object)
|
| 38 |
+
result = idx.intersection(idx[1:])
|
| 39 |
+
expected = idx[1:]
|
| 40 |
+
tm.assert_index_equal(result, expected)
|
| 41 |
+
|
| 42 |
+
# if other is not monotonic increasing, intersection goes through
|
| 43 |
+
# a different route
|
| 44 |
+
result = idx.intersection(idx[1:][::-1])
|
| 45 |
+
tm.assert_index_equal(result, expected)
|
| 46 |
+
|
| 47 |
+
result = idx._union(idx[1:], sort=None)
|
| 48 |
+
expected = idx
|
| 49 |
+
tm.assert_numpy_array_equal(result, expected.values)
|
| 50 |
+
|
| 51 |
+
result = idx.union(idx[1:], sort=None)
|
| 52 |
+
tm.assert_index_equal(result, expected)
|
| 53 |
+
|
| 54 |
+
# if other is not monotonic increasing, _union goes through
|
| 55 |
+
# a different route
|
| 56 |
+
result = idx._union(idx[1:][::-1], sort=None)
|
| 57 |
+
tm.assert_numpy_array_equal(result, expected.values)
|
| 58 |
+
|
| 59 |
+
result = idx.union(idx[1:][::-1], sort=None)
|
| 60 |
+
tm.assert_index_equal(result, expected)
|
| 61 |
+
|
| 62 |
+
def test_union_base(self):
|
| 63 |
+
index = Index([0, "a", 1, "b", 2, "c"])
|
| 64 |
+
first = index[3:]
|
| 65 |
+
second = index[:5]
|
| 66 |
+
|
| 67 |
+
result = first.union(second)
|
| 68 |
+
|
| 69 |
+
expected = Index([0, 1, 2, "a", "b", "c"])
|
| 70 |
+
tm.assert_index_equal(result, expected)
|
| 71 |
+
|
| 72 |
+
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
| 73 |
+
def test_union_different_type_base(self, klass):
|
| 74 |
+
# GH 10149
|
| 75 |
+
index = Index([0, "a", 1, "b", 2, "c"])
|
| 76 |
+
first = index[3:]
|
| 77 |
+
second = index[:5]
|
| 78 |
+
|
| 79 |
+
result = first.union(klass(second.values))
|
| 80 |
+
|
| 81 |
+
assert equal_contents(result, index)
|
| 82 |
+
|
| 83 |
+
def test_union_sort_other_incomparable(self):
|
| 84 |
+
# https://github.com/pandas-dev/pandas/issues/24959
|
| 85 |
+
idx = Index([1, pd.Timestamp("2000")])
|
| 86 |
+
# default (sort=None)
|
| 87 |
+
with tm.assert_produces_warning(RuntimeWarning):
|
| 88 |
+
result = idx.union(idx[:1])
|
| 89 |
+
|
| 90 |
+
tm.assert_index_equal(result, idx)
|
| 91 |
+
|
| 92 |
+
# sort=None
|
| 93 |
+
with tm.assert_produces_warning(RuntimeWarning):
|
| 94 |
+
result = idx.union(idx[:1], sort=None)
|
| 95 |
+
tm.assert_index_equal(result, idx)
|
| 96 |
+
|
| 97 |
+
# sort=False
|
| 98 |
+
result = idx.union(idx[:1], sort=False)
|
| 99 |
+
tm.assert_index_equal(result, idx)
|
| 100 |
+
|
| 101 |
+
def test_union_sort_other_incomparable_true(self):
|
| 102 |
+
idx = Index([1, pd.Timestamp("2000")])
|
| 103 |
+
with pytest.raises(TypeError, match=".*"):
|
| 104 |
+
idx.union(idx[:1], sort=True)
|
| 105 |
+
|
| 106 |
+
def test_intersection_equal_sort_true(self):
|
| 107 |
+
idx = Index(["c", "a", "b"])
|
| 108 |
+
sorted_ = Index(["a", "b", "c"])
|
| 109 |
+
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
|
| 110 |
+
|
| 111 |
+
def test_intersection_base(self, sort):
|
| 112 |
+
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
| 113 |
+
index = Index([0, "a", 1, "b", 2, "c"])
|
| 114 |
+
first = index[:5]
|
| 115 |
+
second = index[:3]
|
| 116 |
+
|
| 117 |
+
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
|
| 118 |
+
result = first.intersection(second, sort=sort)
|
| 119 |
+
tm.assert_index_equal(result, expected)
|
| 120 |
+
|
| 121 |
+
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
| 122 |
+
def test_intersection_different_type_base(self, klass, sort):
|
| 123 |
+
# GH 10149
|
| 124 |
+
index = Index([0, "a", 1, "b", 2, "c"])
|
| 125 |
+
first = index[:5]
|
| 126 |
+
second = index[:3]
|
| 127 |
+
|
| 128 |
+
result = first.intersection(klass(second.values), sort=sort)
|
| 129 |
+
assert equal_contents(result, second)
|
| 130 |
+
|
| 131 |
+
def test_intersection_nosort(self):
|
| 132 |
+
result = Index(["c", "b", "a"]).intersection(["b", "a"])
|
| 133 |
+
expected = Index(["b", "a"])
|
| 134 |
+
tm.assert_index_equal(result, expected)
|
| 135 |
+
|
| 136 |
+
def test_intersection_equal_sort(self):
|
| 137 |
+
idx = Index(["c", "a", "b"])
|
| 138 |
+
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
| 139 |
+
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
| 140 |
+
|
| 141 |
+
def test_intersection_str_dates(self, sort):
|
| 142 |
+
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
|
| 143 |
+
|
| 144 |
+
i1 = Index(dt_dates, dtype=object)
|
| 145 |
+
i2 = Index(["aa"], dtype=object)
|
| 146 |
+
result = i2.intersection(i1, sort=sort)
|
| 147 |
+
|
| 148 |
+
assert len(result) == 0
|
| 149 |
+
|
| 150 |
+
@pytest.mark.parametrize(
|
| 151 |
+
"index2,expected_arr",
|
| 152 |
+
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
|
| 153 |
+
)
|
| 154 |
+
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
|
| 155 |
+
# non-monotonic non-unique
|
| 156 |
+
index1 = Index(["A", "B", "A", "C"])
|
| 157 |
+
expected = Index(expected_arr)
|
| 158 |
+
result = index1.intersection(index2, sort=sort)
|
| 159 |
+
if sort is None:
|
| 160 |
+
expected = expected.sort_values()
|
| 161 |
+
tm.assert_index_equal(result, expected)
|
| 162 |
+
|
| 163 |
+
def test_difference_base(self, sort):
|
| 164 |
+
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
| 165 |
+
index = Index([0, "a", 1, "b", 2, "c"])
|
| 166 |
+
first = index[:4]
|
| 167 |
+
second = index[3:]
|
| 168 |
+
|
| 169 |
+
result = first.difference(second, sort)
|
| 170 |
+
expected = Index([0, "a", 1])
|
| 171 |
+
if sort is None:
|
| 172 |
+
expected = Index(safe_sort(expected))
|
| 173 |
+
tm.assert_index_equal(result, expected)
|
| 174 |
+
|
| 175 |
+
def test_symmetric_difference(self):
|
| 176 |
+
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
| 177 |
+
index = Index([0, "a", 1, "b", 2, "c"])
|
| 178 |
+
first = index[:4]
|
| 179 |
+
second = index[3:]
|
| 180 |
+
|
| 181 |
+
result = first.symmetric_difference(second)
|
| 182 |
+
expected = Index([0, 1, 2, "a", "c"])
|
| 183 |
+
tm.assert_index_equal(result, expected)
|
| 184 |
+
|
| 185 |
+
@pytest.mark.parametrize(
|
| 186 |
+
"method,expected,sort",
|
| 187 |
+
[
|
| 188 |
+
(
|
| 189 |
+
"intersection",
|
| 190 |
+
np.array(
|
| 191 |
+
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
| 192 |
+
dtype=[("num", int), ("let", "S1")],
|
| 193 |
+
),
|
| 194 |
+
False,
|
| 195 |
+
),
|
| 196 |
+
(
|
| 197 |
+
"intersection",
|
| 198 |
+
np.array(
|
| 199 |
+
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
|
| 200 |
+
dtype=[("num", int), ("let", "S1")],
|
| 201 |
+
),
|
| 202 |
+
None,
|
| 203 |
+
),
|
| 204 |
+
(
|
| 205 |
+
"union",
|
| 206 |
+
np.array(
|
| 207 |
+
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
|
| 208 |
+
dtype=[("num", int), ("let", "S1")],
|
| 209 |
+
),
|
| 210 |
+
None,
|
| 211 |
+
),
|
| 212 |
+
],
|
| 213 |
+
)
|
| 214 |
+
def test_tuple_union_bug(self, method, expected, sort):
|
| 215 |
+
index1 = Index(
|
| 216 |
+
np.array(
|
| 217 |
+
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
| 218 |
+
dtype=[("num", int), ("let", "S1")],
|
| 219 |
+
)
|
| 220 |
+
)
|
| 221 |
+
index2 = Index(
|
| 222 |
+
np.array(
|
| 223 |
+
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
|
| 224 |
+
dtype=[("num", int), ("let", "S1")],
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
result = getattr(index1, method)(index2, sort=sort)
|
| 229 |
+
assert result.ndim == 1
|
| 230 |
+
|
| 231 |
+
expected = Index(expected)
|
| 232 |
+
tm.assert_index_equal(result, expected)
|
| 233 |
+
|
| 234 |
+
@pytest.mark.parametrize("first_list", [["b", "a"], []])
|
| 235 |
+
@pytest.mark.parametrize("second_list", [["a", "b"], []])
|
| 236 |
+
@pytest.mark.parametrize(
|
| 237 |
+
"first_name, second_name, expected_name",
|
| 238 |
+
[("A", "B", None), (None, "B", None), ("A", None, None)],
|
| 239 |
+
)
|
| 240 |
+
def test_union_name_preservation(
|
| 241 |
+
self, first_list, second_list, first_name, second_name, expected_name, sort
|
| 242 |
+
):
|
| 243 |
+
first = Index(first_list, name=first_name)
|
| 244 |
+
second = Index(second_list, name=second_name)
|
| 245 |
+
union = first.union(second, sort=sort)
|
| 246 |
+
|
| 247 |
+
vals = set(first_list).union(second_list)
|
| 248 |
+
|
| 249 |
+
if sort is None and len(first_list) > 0 and len(second_list) > 0:
|
| 250 |
+
expected = Index(sorted(vals), name=expected_name)
|
| 251 |
+
tm.assert_index_equal(union, expected)
|
| 252 |
+
else:
|
| 253 |
+
expected = Index(vals, name=expected_name)
|
| 254 |
+
tm.assert_index_equal(union.sort_values(), expected.sort_values())
|
| 255 |
+
|
| 256 |
+
@pytest.mark.parametrize(
|
| 257 |
+
"diff_type, expected",
|
| 258 |
+
[["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
|
| 259 |
+
)
|
| 260 |
+
def test_difference_object_type(self, diff_type, expected):
|
| 261 |
+
# GH 13432
|
| 262 |
+
idx1 = Index([0, 1, "A", "B"])
|
| 263 |
+
idx2 = Index([0, 2, "A", "C"])
|
| 264 |
+
result = getattr(idx1, diff_type)(idx2)
|
| 265 |
+
expected = Index(expected)
|
| 266 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from pandas import Index
|
| 4 |
+
import pandas._testing as tm
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestWhere:
|
| 8 |
+
def test_where_intlike_str_doesnt_cast_ints(self):
|
| 9 |
+
idx = Index(range(3))
|
| 10 |
+
mask = np.array([True, False, True])
|
| 11 |
+
res = idx.where(mask, "2")
|
| 12 |
+
expected = Index([0, "2", 2])
|
| 13 |
+
tm.assert_index_equal(res, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
PeriodIndex,
|
| 6 |
+
Series,
|
| 7 |
+
date_range,
|
| 8 |
+
period_range,
|
| 9 |
+
timedelta_range,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class DropDuplicates:
|
| 15 |
+
def test_drop_duplicates_metadata(self, idx):
|
| 16 |
+
# GH#10115
|
| 17 |
+
result = idx.drop_duplicates()
|
| 18 |
+
tm.assert_index_equal(idx, result)
|
| 19 |
+
assert idx.freq == result.freq
|
| 20 |
+
|
| 21 |
+
idx_dup = idx.append(idx)
|
| 22 |
+
result = idx_dup.drop_duplicates()
|
| 23 |
+
|
| 24 |
+
expected = idx
|
| 25 |
+
if not isinstance(idx, PeriodIndex):
|
| 26 |
+
# freq is reset except for PeriodIndex
|
| 27 |
+
assert idx_dup.freq is None
|
| 28 |
+
assert result.freq is None
|
| 29 |
+
expected = idx._with_freq(None)
|
| 30 |
+
else:
|
| 31 |
+
assert result.freq == expected.freq
|
| 32 |
+
|
| 33 |
+
tm.assert_index_equal(result, expected)
|
| 34 |
+
|
| 35 |
+
@pytest.mark.parametrize(
|
| 36 |
+
"keep, expected, index",
|
| 37 |
+
[
|
| 38 |
+
(
|
| 39 |
+
"first",
|
| 40 |
+
np.concatenate(([False] * 10, [True] * 5)),
|
| 41 |
+
np.arange(0, 10, dtype=np.int64),
|
| 42 |
+
),
|
| 43 |
+
(
|
| 44 |
+
"last",
|
| 45 |
+
np.concatenate(([True] * 5, [False] * 10)),
|
| 46 |
+
np.arange(5, 15, dtype=np.int64),
|
| 47 |
+
),
|
| 48 |
+
(
|
| 49 |
+
False,
|
| 50 |
+
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
| 51 |
+
np.arange(5, 10, dtype=np.int64),
|
| 52 |
+
),
|
| 53 |
+
],
|
| 54 |
+
)
|
| 55 |
+
def test_drop_duplicates(self, keep, expected, index, idx):
|
| 56 |
+
# to check Index/Series compat
|
| 57 |
+
idx = idx.append(idx[:5])
|
| 58 |
+
|
| 59 |
+
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
| 60 |
+
expected = idx[~expected]
|
| 61 |
+
|
| 62 |
+
result = idx.drop_duplicates(keep=keep)
|
| 63 |
+
tm.assert_index_equal(result, expected)
|
| 64 |
+
|
| 65 |
+
result = Series(idx).drop_duplicates(keep=keep)
|
| 66 |
+
expected = Series(expected, index=index)
|
| 67 |
+
tm.assert_series_equal(result, expected)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class TestDropDuplicatesPeriodIndex(DropDuplicates):
|
| 71 |
+
@pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"])
|
| 72 |
+
def freq(self, request):
|
| 73 |
+
return request.param
|
| 74 |
+
|
| 75 |
+
@pytest.fixture
|
| 76 |
+
def idx(self, freq):
|
| 77 |
+
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
|
| 81 |
+
@pytest.fixture
|
| 82 |
+
def idx(self, freq_sample):
|
| 83 |
+
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
|
| 87 |
+
@pytest.fixture
|
| 88 |
+
def idx(self, freq_sample):
|
| 89 |
+
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
|
| 3 |
+
"""
|
| 4 |
+
from datetime import (
|
| 5 |
+
datetime,
|
| 6 |
+
timedelta,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
import pytest
|
| 11 |
+
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from pandas import (
|
| 14 |
+
CategoricalIndex,
|
| 15 |
+
DatetimeIndex,
|
| 16 |
+
Index,
|
| 17 |
+
PeriodIndex,
|
| 18 |
+
TimedeltaIndex,
|
| 19 |
+
date_range,
|
| 20 |
+
period_range,
|
| 21 |
+
timedelta_range,
|
| 22 |
+
)
|
| 23 |
+
import pandas._testing as tm
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class EqualsTests:
|
| 27 |
+
def test_not_equals_numeric(self, index):
|
| 28 |
+
assert not index.equals(Index(index.asi8))
|
| 29 |
+
assert not index.equals(Index(index.asi8.astype("u8")))
|
| 30 |
+
assert not index.equals(Index(index.asi8).astype("f8"))
|
| 31 |
+
|
| 32 |
+
def test_equals(self, index):
|
| 33 |
+
assert index.equals(index)
|
| 34 |
+
assert index.equals(index.astype(object))
|
| 35 |
+
assert index.equals(CategoricalIndex(index))
|
| 36 |
+
assert index.equals(CategoricalIndex(index.astype(object)))
|
| 37 |
+
|
| 38 |
+
def test_not_equals_non_arraylike(self, index):
|
| 39 |
+
assert not index.equals(list(index))
|
| 40 |
+
|
| 41 |
+
def test_not_equals_strings(self, index):
|
| 42 |
+
other = Index([str(x) for x in index], dtype=object)
|
| 43 |
+
assert not index.equals(other)
|
| 44 |
+
assert not index.equals(CategoricalIndex(other))
|
| 45 |
+
|
| 46 |
+
def test_not_equals_misc_strs(self, index):
|
| 47 |
+
other = Index(list("abc"))
|
| 48 |
+
assert not index.equals(other)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class TestPeriodIndexEquals(EqualsTests):
|
| 52 |
+
@pytest.fixture
|
| 53 |
+
def index(self):
|
| 54 |
+
return period_range("2013-01-01", periods=5, freq="D")
|
| 55 |
+
|
| 56 |
+
# TODO: de-duplicate with other test_equals2 methods
|
| 57 |
+
@pytest.mark.parametrize("freq", ["D", "M"])
|
| 58 |
+
def test_equals2(self, freq):
|
| 59 |
+
# GH#13107
|
| 60 |
+
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
|
| 61 |
+
assert idx.equals(idx)
|
| 62 |
+
assert idx.equals(idx.copy())
|
| 63 |
+
assert idx.equals(idx.astype(object))
|
| 64 |
+
assert idx.astype(object).equals(idx)
|
| 65 |
+
assert idx.astype(object).equals(idx.astype(object))
|
| 66 |
+
assert not idx.equals(list(idx))
|
| 67 |
+
assert not idx.equals(pd.Series(idx))
|
| 68 |
+
|
| 69 |
+
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h")
|
| 70 |
+
assert not idx.equals(idx2)
|
| 71 |
+
assert not idx.equals(idx2.copy())
|
| 72 |
+
assert not idx.equals(idx2.astype(object))
|
| 73 |
+
assert not idx.astype(object).equals(idx2)
|
| 74 |
+
assert not idx.equals(list(idx2))
|
| 75 |
+
assert not idx.equals(pd.Series(idx2))
|
| 76 |
+
|
| 77 |
+
# same internal, different tz
|
| 78 |
+
idx3 = PeriodIndex._simple_new(
|
| 79 |
+
idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h"))
|
| 80 |
+
)
|
| 81 |
+
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
| 82 |
+
assert not idx.equals(idx3)
|
| 83 |
+
assert not idx.equals(idx3.copy())
|
| 84 |
+
assert not idx.equals(idx3.astype(object))
|
| 85 |
+
assert not idx.astype(object).equals(idx3)
|
| 86 |
+
assert not idx.equals(list(idx3))
|
| 87 |
+
assert not idx.equals(pd.Series(idx3))
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class TestDatetimeIndexEquals(EqualsTests):
|
| 91 |
+
@pytest.fixture
|
| 92 |
+
def index(self):
|
| 93 |
+
return date_range("2013-01-01", periods=5)
|
| 94 |
+
|
| 95 |
+
def test_equals2(self):
|
| 96 |
+
# GH#13107
|
| 97 |
+
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
|
| 98 |
+
assert idx.equals(idx)
|
| 99 |
+
assert idx.equals(idx.copy())
|
| 100 |
+
assert idx.equals(idx.astype(object))
|
| 101 |
+
assert idx.astype(object).equals(idx)
|
| 102 |
+
assert idx.astype(object).equals(idx.astype(object))
|
| 103 |
+
assert not idx.equals(list(idx))
|
| 104 |
+
assert not idx.equals(pd.Series(idx))
|
| 105 |
+
|
| 106 |
+
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
|
| 107 |
+
assert not idx.equals(idx2)
|
| 108 |
+
assert not idx.equals(idx2.copy())
|
| 109 |
+
assert not idx.equals(idx2.astype(object))
|
| 110 |
+
assert not idx.astype(object).equals(idx2)
|
| 111 |
+
assert not idx.equals(list(idx2))
|
| 112 |
+
assert not idx.equals(pd.Series(idx2))
|
| 113 |
+
|
| 114 |
+
# same internal, different tz
|
| 115 |
+
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
|
| 116 |
+
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
| 117 |
+
assert not idx.equals(idx3)
|
| 118 |
+
assert not idx.equals(idx3.copy())
|
| 119 |
+
assert not idx.equals(idx3.astype(object))
|
| 120 |
+
assert not idx.astype(object).equals(idx3)
|
| 121 |
+
assert not idx.equals(list(idx3))
|
| 122 |
+
assert not idx.equals(pd.Series(idx3))
|
| 123 |
+
|
| 124 |
+
# check that we do not raise when comparing with OutOfBounds objects
|
| 125 |
+
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
|
| 126 |
+
assert not idx.equals(oob)
|
| 127 |
+
assert not idx2.equals(oob)
|
| 128 |
+
assert not idx3.equals(oob)
|
| 129 |
+
|
| 130 |
+
# check that we do not raise when comparing with OutOfBounds dt64
|
| 131 |
+
oob2 = oob.map(np.datetime64)
|
| 132 |
+
assert not idx.equals(oob2)
|
| 133 |
+
assert not idx2.equals(oob2)
|
| 134 |
+
assert not idx3.equals(oob2)
|
| 135 |
+
|
| 136 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 137 |
+
def test_not_equals_bday(self, freq):
|
| 138 |
+
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
| 139 |
+
assert not rng.equals(list(rng))
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
class TestTimedeltaIndexEquals(EqualsTests):
|
| 143 |
+
@pytest.fixture
|
| 144 |
+
def index(self):
|
| 145 |
+
return timedelta_range("1 day", periods=10)
|
| 146 |
+
|
| 147 |
+
def test_equals2(self):
|
| 148 |
+
# GH#13107
|
| 149 |
+
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
|
| 150 |
+
assert idx.equals(idx)
|
| 151 |
+
assert idx.equals(idx.copy())
|
| 152 |
+
assert idx.equals(idx.astype(object))
|
| 153 |
+
assert idx.astype(object).equals(idx)
|
| 154 |
+
assert idx.astype(object).equals(idx.astype(object))
|
| 155 |
+
assert not idx.equals(list(idx))
|
| 156 |
+
assert not idx.equals(pd.Series(idx))
|
| 157 |
+
|
| 158 |
+
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
|
| 159 |
+
assert not idx.equals(idx2)
|
| 160 |
+
assert not idx.equals(idx2.copy())
|
| 161 |
+
assert not idx.equals(idx2.astype(object))
|
| 162 |
+
assert not idx.astype(object).equals(idx2)
|
| 163 |
+
assert not idx.astype(object).equals(idx2.astype(object))
|
| 164 |
+
assert not idx.equals(list(idx2))
|
| 165 |
+
assert not idx.equals(pd.Series(idx2))
|
| 166 |
+
|
| 167 |
+
# Check that we dont raise OverflowError on comparisons outside the
|
| 168 |
+
# implementation range GH#28532
|
| 169 |
+
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
|
| 170 |
+
assert not idx.equals(oob)
|
| 171 |
+
assert not idx2.equals(oob)
|
| 172 |
+
|
| 173 |
+
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
|
| 174 |
+
assert (oob == oob2).all()
|
| 175 |
+
assert not idx.equals(oob2)
|
| 176 |
+
assert not idx2.equals(oob2)
|
| 177 |
+
|
| 178 |
+
oob3 = oob.map(np.timedelta64)
|
| 179 |
+
assert (oob3 == oob).all()
|
| 180 |
+
assert not idx.equals(oob3)
|
| 181 |
+
assert not idx2.equals(oob3)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
DatetimeIndex,
|
| 7 |
+
Index,
|
| 8 |
+
)
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
dtlike_dtypes = [
|
| 12 |
+
np.dtype("timedelta64[ns]"),
|
| 13 |
+
np.dtype("datetime64[ns]"),
|
| 14 |
+
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
|
| 15 |
+
pd.PeriodDtype("ns"),
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
|
| 20 |
+
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
|
| 21 |
+
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
|
| 22 |
+
vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
|
| 23 |
+
|
| 24 |
+
def construct(dtype):
|
| 25 |
+
if dtype is dtlike_dtypes[-1]:
|
| 26 |
+
# PeriodArray will try to cast ints to strings
|
| 27 |
+
return DatetimeIndex(vals).astype(dtype)
|
| 28 |
+
return Index(vals, dtype=dtype)
|
| 29 |
+
|
| 30 |
+
left = construct(ldtype)
|
| 31 |
+
right = construct(rdtype)
|
| 32 |
+
|
| 33 |
+
result = left.get_indexer_non_unique(right)
|
| 34 |
+
|
| 35 |
+
if ldtype is rdtype:
|
| 36 |
+
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
|
| 37 |
+
ex2 = np.array([], dtype=np.intp)
|
| 38 |
+
tm.assert_numpy_array_equal(result[0], ex1)
|
| 39 |
+
tm.assert_numpy_array_equal(result[1], ex2)
|
| 40 |
+
|
| 41 |
+
else:
|
| 42 |
+
no_matches = np.array([-1] * 6, dtype=np.intp)
|
| 43 |
+
missing = np.arange(6, dtype=np.intp)
|
| 44 |
+
tm.assert_numpy_array_equal(result[0], no_matches)
|
| 45 |
+
tm.assert_numpy_array_equal(result[1], missing)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas import (
|
| 2 |
+
Index,
|
| 3 |
+
NaT,
|
| 4 |
+
date_range,
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def test_is_monotonic_with_nat():
|
| 9 |
+
# GH#31437
|
| 10 |
+
# PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex,
|
| 11 |
+
# in particular never be monotonic when we have NaT
|
| 12 |
+
dti = date_range("2016-01-01", periods=3)
|
| 13 |
+
pi = dti.to_period("D")
|
| 14 |
+
tdi = Index(dti.view("timedelta64[ns]"))
|
| 15 |
+
|
| 16 |
+
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
|
| 17 |
+
if isinstance(obj, Index):
|
| 18 |
+
# i.e. not Engines
|
| 19 |
+
assert obj.is_monotonic_increasing
|
| 20 |
+
assert obj.is_monotonic_increasing
|
| 21 |
+
assert not obj.is_monotonic_decreasing
|
| 22 |
+
assert obj.is_unique
|
| 23 |
+
|
| 24 |
+
dti1 = dti.insert(0, NaT)
|
| 25 |
+
pi1 = dti1.to_period("D")
|
| 26 |
+
tdi1 = Index(dti1.view("timedelta64[ns]"))
|
| 27 |
+
|
| 28 |
+
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
|
| 29 |
+
if isinstance(obj, Index):
|
| 30 |
+
# i.e. not Engines
|
| 31 |
+
assert not obj.is_monotonic_increasing
|
| 32 |
+
assert not obj.is_monotonic_increasing
|
| 33 |
+
assert not obj.is_monotonic_decreasing
|
| 34 |
+
assert obj.is_unique
|
| 35 |
+
|
| 36 |
+
dti2 = dti.insert(3, NaT)
|
| 37 |
+
pi2 = dti2.to_period("h")
|
| 38 |
+
tdi2 = Index(dti2.view("timedelta64[ns]"))
|
| 39 |
+
|
| 40 |
+
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
|
| 41 |
+
if isinstance(obj, Index):
|
| 42 |
+
# i.e. not Engines
|
| 43 |
+
assert not obj.is_monotonic_increasing
|
| 44 |
+
assert not obj.is_monotonic_increasing
|
| 45 |
+
assert not obj.is_monotonic_decreasing
|
| 46 |
+
assert obj.is_unique
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
DatetimeIndex,
|
| 6 |
+
NaT,
|
| 7 |
+
PeriodIndex,
|
| 8 |
+
TimedeltaIndex,
|
| 9 |
+
)
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class NATests:
|
| 14 |
+
def test_nat(self, index_without_na):
|
| 15 |
+
empty_index = index_without_na[:0]
|
| 16 |
+
|
| 17 |
+
index_with_na = index_without_na.copy(deep=True)
|
| 18 |
+
index_with_na._data[1] = NaT
|
| 19 |
+
|
| 20 |
+
assert empty_index._na_value is NaT
|
| 21 |
+
assert index_with_na._na_value is NaT
|
| 22 |
+
assert index_without_na._na_value is NaT
|
| 23 |
+
|
| 24 |
+
idx = index_without_na
|
| 25 |
+
assert idx._can_hold_na
|
| 26 |
+
|
| 27 |
+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
| 28 |
+
assert idx.hasnans is False
|
| 29 |
+
|
| 30 |
+
idx = index_with_na
|
| 31 |
+
assert idx._can_hold_na
|
| 32 |
+
|
| 33 |
+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
| 34 |
+
assert idx.hasnans is True
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class TestDatetimeIndexNA(NATests):
|
| 38 |
+
@pytest.fixture
|
| 39 |
+
def index_without_na(self, tz_naive_fixture):
|
| 40 |
+
tz = tz_naive_fixture
|
| 41 |
+
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class TestTimedeltaIndexNA(NATests):
|
| 45 |
+
@pytest.fixture
|
| 46 |
+
def index_without_na(self):
|
| 47 |
+
return TimedeltaIndex(["1 days", "2 days"])
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class TestPeriodIndexNA(NATests):
|
| 51 |
+
@pytest.fixture
|
| 52 |
+
def index_without_na(self):
|
| 53 |
+
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
DatetimeIndex,
|
| 6 |
+
Index,
|
| 7 |
+
NaT,
|
| 8 |
+
PeriodIndex,
|
| 9 |
+
TimedeltaIndex,
|
| 10 |
+
timedelta_range,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def check_freq_ascending(ordered, orig, ascending):
|
| 16 |
+
"""
|
| 17 |
+
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
| 18 |
+
when the original index is generated (or generate-able) with
|
| 19 |
+
period_range/date_range/timedelta_range.
|
| 20 |
+
"""
|
| 21 |
+
if isinstance(ordered, PeriodIndex):
|
| 22 |
+
assert ordered.freq == orig.freq
|
| 23 |
+
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
| 24 |
+
if ascending:
|
| 25 |
+
assert ordered.freq.n == orig.freq.n
|
| 26 |
+
else:
|
| 27 |
+
assert ordered.freq.n == -1 * orig.freq.n
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def check_freq_nonmonotonic(ordered, orig):
|
| 31 |
+
"""
|
| 32 |
+
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
| 33 |
+
when the original index is _not_ generated (or generate-able) with
|
| 34 |
+
period_range/date_range//timedelta_range.
|
| 35 |
+
"""
|
| 36 |
+
if isinstance(ordered, PeriodIndex):
|
| 37 |
+
assert ordered.freq == orig.freq
|
| 38 |
+
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
| 39 |
+
assert ordered.freq is None
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class TestSortValues:
|
| 43 |
+
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
|
| 44 |
+
def non_monotonic_idx(self, request):
|
| 45 |
+
if request.param is DatetimeIndex:
|
| 46 |
+
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
| 47 |
+
elif request.param is PeriodIndex:
|
| 48 |
+
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
| 49 |
+
return dti.to_period("D")
|
| 50 |
+
else:
|
| 51 |
+
return TimedeltaIndex(
|
| 52 |
+
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
def test_argmin_argmax(self, non_monotonic_idx):
|
| 56 |
+
assert non_monotonic_idx.argmin() == 1
|
| 57 |
+
assert non_monotonic_idx.argmax() == 0
|
| 58 |
+
|
| 59 |
+
def test_sort_values(self, non_monotonic_idx):
|
| 60 |
+
idx = non_monotonic_idx
|
| 61 |
+
ordered = idx.sort_values()
|
| 62 |
+
assert ordered.is_monotonic_increasing
|
| 63 |
+
ordered = idx.sort_values(ascending=False)
|
| 64 |
+
assert ordered[::-1].is_monotonic_increasing
|
| 65 |
+
|
| 66 |
+
ordered, dexer = idx.sort_values(return_indexer=True)
|
| 67 |
+
assert ordered.is_monotonic_increasing
|
| 68 |
+
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
|
| 69 |
+
|
| 70 |
+
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
| 71 |
+
assert ordered[::-1].is_monotonic_increasing
|
| 72 |
+
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
|
| 73 |
+
|
| 74 |
+
def check_sort_values_with_freq(self, idx):
|
| 75 |
+
ordered = idx.sort_values()
|
| 76 |
+
tm.assert_index_equal(ordered, idx)
|
| 77 |
+
check_freq_ascending(ordered, idx, True)
|
| 78 |
+
|
| 79 |
+
ordered = idx.sort_values(ascending=False)
|
| 80 |
+
expected = idx[::-1]
|
| 81 |
+
tm.assert_index_equal(ordered, expected)
|
| 82 |
+
check_freq_ascending(ordered, idx, False)
|
| 83 |
+
|
| 84 |
+
ordered, indexer = idx.sort_values(return_indexer=True)
|
| 85 |
+
tm.assert_index_equal(ordered, idx)
|
| 86 |
+
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
|
| 87 |
+
check_freq_ascending(ordered, idx, True)
|
| 88 |
+
|
| 89 |
+
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
| 90 |
+
expected = idx[::-1]
|
| 91 |
+
tm.assert_index_equal(ordered, expected)
|
| 92 |
+
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
|
| 93 |
+
check_freq_ascending(ordered, idx, False)
|
| 94 |
+
|
| 95 |
+
@pytest.mark.parametrize("freq", ["D", "h"])
|
| 96 |
+
def test_sort_values_with_freq_timedeltaindex(self, freq):
|
| 97 |
+
# GH#10295
|
| 98 |
+
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
|
| 99 |
+
|
| 100 |
+
self.check_sort_values_with_freq(idx)
|
| 101 |
+
|
| 102 |
+
@pytest.mark.parametrize(
|
| 103 |
+
"idx",
|
| 104 |
+
[
|
| 105 |
+
DatetimeIndex(
|
| 106 |
+
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
|
| 107 |
+
),
|
| 108 |
+
DatetimeIndex(
|
| 109 |
+
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
| 110 |
+
freq="h",
|
| 111 |
+
name="tzidx",
|
| 112 |
+
tz="Asia/Tokyo",
|
| 113 |
+
),
|
| 114 |
+
],
|
| 115 |
+
)
|
| 116 |
+
def test_sort_values_with_freq_datetimeindex(self, idx):
|
| 117 |
+
self.check_sort_values_with_freq(idx)
|
| 118 |
+
|
| 119 |
+
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
|
| 120 |
+
def test_sort_values_with_freq_periodindex(self, freq):
|
| 121 |
+
# here with_freq refers to being period_range-like
|
| 122 |
+
idx = PeriodIndex(
|
| 123 |
+
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
| 124 |
+
)
|
| 125 |
+
self.check_sort_values_with_freq(idx)
|
| 126 |
+
|
| 127 |
+
@pytest.mark.parametrize(
|
| 128 |
+
"idx",
|
| 129 |
+
[
|
| 130 |
+
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"),
|
| 131 |
+
Index([2011, 2012, 2013], name="idx"), # for compatibility check
|
| 132 |
+
],
|
| 133 |
+
)
|
| 134 |
+
def test_sort_values_with_freq_periodindex2(self, idx):
|
| 135 |
+
# here with_freq indicates this is period_range-like
|
| 136 |
+
self.check_sort_values_with_freq(idx)
|
| 137 |
+
|
| 138 |
+
def check_sort_values_without_freq(self, idx, expected):
|
| 139 |
+
ordered = idx.sort_values(na_position="first")
|
| 140 |
+
tm.assert_index_equal(ordered, expected)
|
| 141 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 142 |
+
|
| 143 |
+
if not idx.isna().any():
|
| 144 |
+
ordered = idx.sort_values()
|
| 145 |
+
tm.assert_index_equal(ordered, expected)
|
| 146 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 147 |
+
|
| 148 |
+
ordered = idx.sort_values(ascending=False)
|
| 149 |
+
tm.assert_index_equal(ordered, expected[::-1])
|
| 150 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 151 |
+
|
| 152 |
+
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
|
| 153 |
+
tm.assert_index_equal(ordered, expected)
|
| 154 |
+
|
| 155 |
+
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
| 156 |
+
tm.assert_numpy_array_equal(indexer, exp)
|
| 157 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 158 |
+
|
| 159 |
+
if not idx.isna().any():
|
| 160 |
+
ordered, indexer = idx.sort_values(return_indexer=True)
|
| 161 |
+
tm.assert_index_equal(ordered, expected)
|
| 162 |
+
|
| 163 |
+
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
| 164 |
+
tm.assert_numpy_array_equal(indexer, exp)
|
| 165 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 166 |
+
|
| 167 |
+
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
| 168 |
+
tm.assert_index_equal(ordered, expected[::-1])
|
| 169 |
+
|
| 170 |
+
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
|
| 171 |
+
tm.assert_numpy_array_equal(indexer, exp)
|
| 172 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 173 |
+
|
| 174 |
+
def test_sort_values_without_freq_timedeltaindex(self):
|
| 175 |
+
# GH#10295
|
| 176 |
+
|
| 177 |
+
idx = TimedeltaIndex(
|
| 178 |
+
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
| 179 |
+
)
|
| 180 |
+
expected = TimedeltaIndex(
|
| 181 |
+
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
| 182 |
+
)
|
| 183 |
+
self.check_sort_values_without_freq(idx, expected)
|
| 184 |
+
|
| 185 |
+
@pytest.mark.parametrize(
|
| 186 |
+
"index_dates,expected_dates",
|
| 187 |
+
[
|
| 188 |
+
(
|
| 189 |
+
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
| 190 |
+
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
| 191 |
+
),
|
| 192 |
+
(
|
| 193 |
+
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
| 194 |
+
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
| 195 |
+
),
|
| 196 |
+
(
|
| 197 |
+
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
| 198 |
+
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
| 199 |
+
),
|
| 200 |
+
],
|
| 201 |
+
)
|
| 202 |
+
def test_sort_values_without_freq_datetimeindex(
|
| 203 |
+
self, index_dates, expected_dates, tz_naive_fixture
|
| 204 |
+
):
|
| 205 |
+
tz = tz_naive_fixture
|
| 206 |
+
|
| 207 |
+
# without freq
|
| 208 |
+
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
|
| 209 |
+
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
|
| 210 |
+
|
| 211 |
+
self.check_sort_values_without_freq(idx, expected)
|
| 212 |
+
|
| 213 |
+
@pytest.mark.parametrize(
|
| 214 |
+
"idx,expected",
|
| 215 |
+
[
|
| 216 |
+
(
|
| 217 |
+
PeriodIndex(
|
| 218 |
+
[
|
| 219 |
+
"2011-01-01",
|
| 220 |
+
"2011-01-03",
|
| 221 |
+
"2011-01-05",
|
| 222 |
+
"2011-01-02",
|
| 223 |
+
"2011-01-01",
|
| 224 |
+
],
|
| 225 |
+
freq="D",
|
| 226 |
+
name="idx1",
|
| 227 |
+
),
|
| 228 |
+
PeriodIndex(
|
| 229 |
+
[
|
| 230 |
+
"2011-01-01",
|
| 231 |
+
"2011-01-01",
|
| 232 |
+
"2011-01-02",
|
| 233 |
+
"2011-01-03",
|
| 234 |
+
"2011-01-05",
|
| 235 |
+
],
|
| 236 |
+
freq="D",
|
| 237 |
+
name="idx1",
|
| 238 |
+
),
|
| 239 |
+
),
|
| 240 |
+
(
|
| 241 |
+
PeriodIndex(
|
| 242 |
+
[
|
| 243 |
+
"2011-01-01",
|
| 244 |
+
"2011-01-03",
|
| 245 |
+
"2011-01-05",
|
| 246 |
+
"2011-01-02",
|
| 247 |
+
"2011-01-01",
|
| 248 |
+
],
|
| 249 |
+
freq="D",
|
| 250 |
+
name="idx2",
|
| 251 |
+
),
|
| 252 |
+
PeriodIndex(
|
| 253 |
+
[
|
| 254 |
+
"2011-01-01",
|
| 255 |
+
"2011-01-01",
|
| 256 |
+
"2011-01-02",
|
| 257 |
+
"2011-01-03",
|
| 258 |
+
"2011-01-05",
|
| 259 |
+
],
|
| 260 |
+
freq="D",
|
| 261 |
+
name="idx2",
|
| 262 |
+
),
|
| 263 |
+
),
|
| 264 |
+
(
|
| 265 |
+
PeriodIndex(
|
| 266 |
+
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
| 267 |
+
freq="D",
|
| 268 |
+
name="idx3",
|
| 269 |
+
),
|
| 270 |
+
PeriodIndex(
|
| 271 |
+
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
| 272 |
+
freq="D",
|
| 273 |
+
name="idx3",
|
| 274 |
+
),
|
| 275 |
+
),
|
| 276 |
+
(
|
| 277 |
+
PeriodIndex(
|
| 278 |
+
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y"
|
| 279 |
+
),
|
| 280 |
+
PeriodIndex(
|
| 281 |
+
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y"
|
| 282 |
+
),
|
| 283 |
+
),
|
| 284 |
+
(
|
| 285 |
+
# For compatibility check
|
| 286 |
+
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
|
| 287 |
+
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
|
| 288 |
+
),
|
| 289 |
+
],
|
| 290 |
+
)
|
| 291 |
+
def test_sort_values_without_freq_periodindex(self, idx, expected):
|
| 292 |
+
# here without_freq means not generateable by period_range
|
| 293 |
+
self.check_sort_values_without_freq(idx, expected)
|
| 294 |
+
|
| 295 |
+
def test_sort_values_without_freq_periodindex_nat(self):
|
| 296 |
+
# doesn't quite fit into check_sort_values_without_freq
|
| 297 |
+
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
| 298 |
+
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
| 299 |
+
|
| 300 |
+
ordered = idx.sort_values(na_position="first")
|
| 301 |
+
tm.assert_index_equal(ordered, expected)
|
| 302 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 303 |
+
|
| 304 |
+
ordered = idx.sort_values(ascending=False)
|
| 305 |
+
tm.assert_index_equal(ordered, expected[::-1])
|
| 306 |
+
check_freq_nonmonotonic(ordered, idx)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def test_order_stability_compat():
|
| 310 |
+
# GH#35922. sort_values is stable both for normal and datetime-like Index
|
| 311 |
+
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y")
|
| 312 |
+
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
| 313 |
+
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
|
| 314 |
+
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
|
| 315 |
+
tm.assert_numpy_array_equal(indexer1, indexer2)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
DatetimeIndex,
|
| 5 |
+
NaT,
|
| 6 |
+
PeriodIndex,
|
| 7 |
+
Series,
|
| 8 |
+
TimedeltaIndex,
|
| 9 |
+
date_range,
|
| 10 |
+
period_range,
|
| 11 |
+
timedelta_range,
|
| 12 |
+
)
|
| 13 |
+
import pandas._testing as tm
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestValueCounts:
|
| 17 |
+
# GH#7735
|
| 18 |
+
|
| 19 |
+
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
|
| 20 |
+
tz = tz_naive_fixture
|
| 21 |
+
orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz)
|
| 22 |
+
self._check_value_counts_with_repeats(orig)
|
| 23 |
+
|
| 24 |
+
def test_value_counts_unique_timedeltaindex(self):
|
| 25 |
+
orig = timedelta_range("1 days 09:00:00", freq="h", periods=10)
|
| 26 |
+
self._check_value_counts_with_repeats(orig)
|
| 27 |
+
|
| 28 |
+
def test_value_counts_unique_periodindex(self):
|
| 29 |
+
orig = period_range("2011-01-01 09:00", freq="h", periods=10)
|
| 30 |
+
self._check_value_counts_with_repeats(orig)
|
| 31 |
+
|
| 32 |
+
def _check_value_counts_with_repeats(self, orig):
|
| 33 |
+
# create repeated values, 'n'th element is repeated by n+1 times
|
| 34 |
+
idx = type(orig)(
|
| 35 |
+
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
exp_idx = orig[::-1]
|
| 39 |
+
if not isinstance(exp_idx, PeriodIndex):
|
| 40 |
+
exp_idx = exp_idx._with_freq(None)
|
| 41 |
+
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count")
|
| 42 |
+
|
| 43 |
+
for obj in [idx, Series(idx)]:
|
| 44 |
+
tm.assert_series_equal(obj.value_counts(), expected)
|
| 45 |
+
|
| 46 |
+
tm.assert_index_equal(idx.unique(), orig)
|
| 47 |
+
|
| 48 |
+
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
|
| 49 |
+
tz = tz_naive_fixture
|
| 50 |
+
idx = DatetimeIndex(
|
| 51 |
+
[
|
| 52 |
+
"2013-01-01 09:00",
|
| 53 |
+
"2013-01-01 09:00",
|
| 54 |
+
"2013-01-01 09:00",
|
| 55 |
+
"2013-01-01 08:00",
|
| 56 |
+
"2013-01-01 08:00",
|
| 57 |
+
NaT,
|
| 58 |
+
],
|
| 59 |
+
tz=tz,
|
| 60 |
+
)
|
| 61 |
+
self._check_value_counts_dropna(idx)
|
| 62 |
+
|
| 63 |
+
def test_value_counts_unique_timedeltaindex2(self):
|
| 64 |
+
idx = TimedeltaIndex(
|
| 65 |
+
[
|
| 66 |
+
"1 days 09:00:00",
|
| 67 |
+
"1 days 09:00:00",
|
| 68 |
+
"1 days 09:00:00",
|
| 69 |
+
"1 days 08:00:00",
|
| 70 |
+
"1 days 08:00:00",
|
| 71 |
+
NaT,
|
| 72 |
+
]
|
| 73 |
+
)
|
| 74 |
+
self._check_value_counts_dropna(idx)
|
| 75 |
+
|
| 76 |
+
def test_value_counts_unique_periodindex2(self):
|
| 77 |
+
idx = PeriodIndex(
|
| 78 |
+
[
|
| 79 |
+
"2013-01-01 09:00",
|
| 80 |
+
"2013-01-01 09:00",
|
| 81 |
+
"2013-01-01 09:00",
|
| 82 |
+
"2013-01-01 08:00",
|
| 83 |
+
"2013-01-01 08:00",
|
| 84 |
+
NaT,
|
| 85 |
+
],
|
| 86 |
+
freq="h",
|
| 87 |
+
)
|
| 88 |
+
self._check_value_counts_dropna(idx)
|
| 89 |
+
|
| 90 |
+
def _check_value_counts_dropna(self, idx):
|
| 91 |
+
exp_idx = idx[[2, 3]]
|
| 92 |
+
expected = Series([3, 2], index=exp_idx, name="count")
|
| 93 |
+
|
| 94 |
+
for obj in [idx, Series(idx)]:
|
| 95 |
+
tm.assert_series_equal(obj.value_counts(), expected)
|
| 96 |
+
|
| 97 |
+
exp_idx = idx[[2, 3, -1]]
|
| 98 |
+
expected = Series([3, 2, 1], index=exp_idx, name="count")
|
| 99 |
+
|
| 100 |
+
for obj in [idx, Series(idx)]:
|
| 101 |
+
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
| 102 |
+
|
| 103 |
+
tm.assert_index_equal(idx.unique(), exp_idx)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas.core.dtypes.dtypes import (
|
| 7 |
+
CategoricalDtype,
|
| 8 |
+
IntervalDtype,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
from pandas import (
|
| 12 |
+
CategoricalIndex,
|
| 13 |
+
Index,
|
| 14 |
+
IntervalIndex,
|
| 15 |
+
NaT,
|
| 16 |
+
Timedelta,
|
| 17 |
+
Timestamp,
|
| 18 |
+
interval_range,
|
| 19 |
+
)
|
| 20 |
+
import pandas._testing as tm
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class AstypeTests:
|
| 24 |
+
"""Tests common to IntervalIndex with any subtype"""
|
| 25 |
+
|
| 26 |
+
def test_astype_idempotent(self, index):
|
| 27 |
+
result = index.astype("interval")
|
| 28 |
+
tm.assert_index_equal(result, index)
|
| 29 |
+
|
| 30 |
+
result = index.astype(index.dtype)
|
| 31 |
+
tm.assert_index_equal(result, index)
|
| 32 |
+
|
| 33 |
+
def test_astype_object(self, index):
|
| 34 |
+
result = index.astype(object)
|
| 35 |
+
expected = Index(index.values, dtype="object")
|
| 36 |
+
tm.assert_index_equal(result, expected)
|
| 37 |
+
assert not result.equals(index)
|
| 38 |
+
|
| 39 |
+
def test_astype_category(self, index):
|
| 40 |
+
result = index.astype("category")
|
| 41 |
+
expected = CategoricalIndex(index.values)
|
| 42 |
+
tm.assert_index_equal(result, expected)
|
| 43 |
+
|
| 44 |
+
result = index.astype(CategoricalDtype())
|
| 45 |
+
tm.assert_index_equal(result, expected)
|
| 46 |
+
|
| 47 |
+
# non-default params
|
| 48 |
+
categories = index.dropna().unique().values[:-1]
|
| 49 |
+
dtype = CategoricalDtype(categories=categories, ordered=True)
|
| 50 |
+
result = index.astype(dtype)
|
| 51 |
+
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
|
| 52 |
+
tm.assert_index_equal(result, expected)
|
| 53 |
+
|
| 54 |
+
@pytest.mark.parametrize(
|
| 55 |
+
"dtype",
|
| 56 |
+
[
|
| 57 |
+
"int64",
|
| 58 |
+
"uint64",
|
| 59 |
+
"float64",
|
| 60 |
+
"complex128",
|
| 61 |
+
"period[M]",
|
| 62 |
+
"timedelta64",
|
| 63 |
+
"timedelta64[ns]",
|
| 64 |
+
"datetime64",
|
| 65 |
+
"datetime64[ns]",
|
| 66 |
+
"datetime64[ns, US/Eastern]",
|
| 67 |
+
],
|
| 68 |
+
)
|
| 69 |
+
def test_astype_cannot_cast(self, index, dtype):
|
| 70 |
+
msg = "Cannot cast IntervalIndex to dtype"
|
| 71 |
+
with pytest.raises(TypeError, match=msg):
|
| 72 |
+
index.astype(dtype)
|
| 73 |
+
|
| 74 |
+
def test_astype_invalid_dtype(self, index):
|
| 75 |
+
msg = "data type [\"']fake_dtype[\"'] not understood"
|
| 76 |
+
with pytest.raises(TypeError, match=msg):
|
| 77 |
+
index.astype("fake_dtype")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class TestIntSubtype(AstypeTests):
|
| 81 |
+
"""Tests specific to IntervalIndex with integer-like subtype"""
|
| 82 |
+
|
| 83 |
+
indexes = [
|
| 84 |
+
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
|
| 85 |
+
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
|
| 86 |
+
]
|
| 87 |
+
|
| 88 |
+
@pytest.fixture(params=indexes)
|
| 89 |
+
def index(self, request):
|
| 90 |
+
return request.param
|
| 91 |
+
|
| 92 |
+
@pytest.mark.parametrize(
|
| 93 |
+
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
|
| 94 |
+
)
|
| 95 |
+
def test_subtype_conversion(self, index, subtype):
|
| 96 |
+
dtype = IntervalDtype(subtype, index.closed)
|
| 97 |
+
result = index.astype(dtype)
|
| 98 |
+
expected = IntervalIndex.from_arrays(
|
| 99 |
+
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
| 100 |
+
)
|
| 101 |
+
tm.assert_index_equal(result, expected)
|
| 102 |
+
|
| 103 |
+
@pytest.mark.parametrize(
|
| 104 |
+
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
|
| 105 |
+
)
|
| 106 |
+
def test_subtype_integer(self, subtype_start, subtype_end):
|
| 107 |
+
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
|
| 108 |
+
dtype = IntervalDtype(subtype_end, index.closed)
|
| 109 |
+
result = index.astype(dtype)
|
| 110 |
+
expected = IntervalIndex.from_arrays(
|
| 111 |
+
index.left.astype(subtype_end),
|
| 112 |
+
index.right.astype(subtype_end),
|
| 113 |
+
closed=index.closed,
|
| 114 |
+
)
|
| 115 |
+
tm.assert_index_equal(result, expected)
|
| 116 |
+
|
| 117 |
+
@pytest.mark.xfail(reason="GH#15832")
|
| 118 |
+
def test_subtype_integer_errors(self):
|
| 119 |
+
# int64 -> uint64 fails with negative values
|
| 120 |
+
index = interval_range(-10, 10)
|
| 121 |
+
dtype = IntervalDtype("uint64", "right")
|
| 122 |
+
|
| 123 |
+
# Until we decide what the exception message _should_ be, we
|
| 124 |
+
# assert something that it should _not_ be.
|
| 125 |
+
# We should _not_ be getting a message suggesting that the -10
|
| 126 |
+
# has been wrapped around to a large-positive integer
|
| 127 |
+
msg = "^(?!(left side of interval must be <= right side))"
|
| 128 |
+
with pytest.raises(ValueError, match=msg):
|
| 129 |
+
index.astype(dtype)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class TestFloatSubtype(AstypeTests):
|
| 133 |
+
"""Tests specific to IntervalIndex with float subtype"""
|
| 134 |
+
|
| 135 |
+
indexes = [
|
| 136 |
+
interval_range(-10.0, 10.0, closed="neither"),
|
| 137 |
+
IntervalIndex.from_arrays(
|
| 138 |
+
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
|
| 139 |
+
),
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
@pytest.fixture(params=indexes)
|
| 143 |
+
def index(self, request):
|
| 144 |
+
return request.param
|
| 145 |
+
|
| 146 |
+
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
| 147 |
+
def test_subtype_integer(self, subtype):
|
| 148 |
+
index = interval_range(0.0, 10.0)
|
| 149 |
+
dtype = IntervalDtype(subtype, "right")
|
| 150 |
+
result = index.astype(dtype)
|
| 151 |
+
expected = IntervalIndex.from_arrays(
|
| 152 |
+
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
| 153 |
+
)
|
| 154 |
+
tm.assert_index_equal(result, expected)
|
| 155 |
+
|
| 156 |
+
# raises with NA
|
| 157 |
+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
|
| 158 |
+
with pytest.raises(ValueError, match=msg):
|
| 159 |
+
index.insert(0, np.nan).astype(dtype)
|
| 160 |
+
|
| 161 |
+
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
| 162 |
+
def test_subtype_integer_with_non_integer_borders(self, subtype):
|
| 163 |
+
index = interval_range(0.0, 3.0, freq=0.25)
|
| 164 |
+
dtype = IntervalDtype(subtype, "right")
|
| 165 |
+
result = index.astype(dtype)
|
| 166 |
+
expected = IntervalIndex.from_arrays(
|
| 167 |
+
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
| 168 |
+
)
|
| 169 |
+
tm.assert_index_equal(result, expected)
|
| 170 |
+
|
| 171 |
+
def test_subtype_integer_errors(self):
|
| 172 |
+
# float64 -> uint64 fails with negative values
|
| 173 |
+
index = interval_range(-10.0, 10.0)
|
| 174 |
+
dtype = IntervalDtype("uint64", "right")
|
| 175 |
+
msg = re.escape(
|
| 176 |
+
"Cannot convert interval[float64, right] to interval[uint64, right]; "
|
| 177 |
+
"subtypes are incompatible"
|
| 178 |
+
)
|
| 179 |
+
with pytest.raises(TypeError, match=msg):
|
| 180 |
+
index.astype(dtype)
|
| 181 |
+
|
| 182 |
+
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
|
| 183 |
+
def test_subtype_datetimelike(self, index, subtype):
|
| 184 |
+
dtype = IntervalDtype(subtype, "right")
|
| 185 |
+
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
| 186 |
+
with pytest.raises(TypeError, match=msg):
|
| 187 |
+
index.astype(dtype)
|
| 188 |
+
|
| 189 |
+
@pytest.mark.filterwarnings(
|
| 190 |
+
"ignore:invalid value encountered in cast:RuntimeWarning"
|
| 191 |
+
)
|
| 192 |
+
def test_astype_category(self, index):
|
| 193 |
+
super().test_astype_category(index)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
class TestDatetimelikeSubtype(AstypeTests):
|
| 197 |
+
"""Tests specific to IntervalIndex with datetime-like subtype"""
|
| 198 |
+
|
| 199 |
+
indexes = [
|
| 200 |
+
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
|
| 201 |
+
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
|
| 202 |
+
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
|
| 203 |
+
interval_range(Timedelta("0 days"), periods=10, closed="both"),
|
| 204 |
+
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
|
| 205 |
+
]
|
| 206 |
+
|
| 207 |
+
@pytest.fixture(params=indexes)
|
| 208 |
+
def index(self, request):
|
| 209 |
+
return request.param
|
| 210 |
+
|
| 211 |
+
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
| 212 |
+
def test_subtype_integer(self, index, subtype):
|
| 213 |
+
dtype = IntervalDtype(subtype, "right")
|
| 214 |
+
|
| 215 |
+
if subtype != "int64":
|
| 216 |
+
msg = (
|
| 217 |
+
r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] "
|
| 218 |
+
r"to interval\[uint64, .*\]"
|
| 219 |
+
)
|
| 220 |
+
with pytest.raises(TypeError, match=msg):
|
| 221 |
+
index.astype(dtype)
|
| 222 |
+
return
|
| 223 |
+
|
| 224 |
+
result = index.astype(dtype)
|
| 225 |
+
new_left = index.left.astype(subtype)
|
| 226 |
+
new_right = index.right.astype(subtype)
|
| 227 |
+
|
| 228 |
+
expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed)
|
| 229 |
+
tm.assert_index_equal(result, expected)
|
| 230 |
+
|
| 231 |
+
def test_subtype_float(self, index):
|
| 232 |
+
dtype = IntervalDtype("float64", "right")
|
| 233 |
+
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
| 234 |
+
with pytest.raises(TypeError, match=msg):
|
| 235 |
+
index.astype(dtype)
|
| 236 |
+
|
| 237 |
+
def test_subtype_datetimelike(self):
|
| 238 |
+
# datetime -> timedelta raises
|
| 239 |
+
dtype = IntervalDtype("timedelta64[ns]", "right")
|
| 240 |
+
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
| 241 |
+
|
| 242 |
+
index = interval_range(Timestamp("2018-01-01"), periods=10)
|
| 243 |
+
with pytest.raises(TypeError, match=msg):
|
| 244 |
+
index.astype(dtype)
|
| 245 |
+
|
| 246 |
+
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
|
| 247 |
+
with pytest.raises(TypeError, match=msg):
|
| 248 |
+
index.astype(dtype)
|
| 249 |
+
|
| 250 |
+
# timedelta -> datetime raises
|
| 251 |
+
dtype = IntervalDtype("datetime64[ns]", "right")
|
| 252 |
+
index = interval_range(Timedelta("0 days"), periods=10)
|
| 253 |
+
with pytest.raises(TypeError, match=msg):
|
| 254 |
+
index.astype(dtype)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py
ADDED
|
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import partial
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
import pandas.util._test_decorators as td
|
| 7 |
+
|
| 8 |
+
from pandas.core.dtypes.common import is_unsigned_integer_dtype
|
| 9 |
+
from pandas.core.dtypes.dtypes import IntervalDtype
|
| 10 |
+
|
| 11 |
+
from pandas import (
|
| 12 |
+
Categorical,
|
| 13 |
+
CategoricalDtype,
|
| 14 |
+
CategoricalIndex,
|
| 15 |
+
Index,
|
| 16 |
+
Interval,
|
| 17 |
+
IntervalIndex,
|
| 18 |
+
date_range,
|
| 19 |
+
notna,
|
| 20 |
+
period_range,
|
| 21 |
+
timedelta_range,
|
| 22 |
+
)
|
| 23 |
+
import pandas._testing as tm
|
| 24 |
+
from pandas.core.arrays import IntervalArray
|
| 25 |
+
import pandas.core.common as com
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@pytest.fixture(params=[None, "foo"])
|
| 29 |
+
def name(request):
|
| 30 |
+
return request.param
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class ConstructorTests:
|
| 34 |
+
"""
|
| 35 |
+
Common tests for all variations of IntervalIndex construction. Input data
|
| 36 |
+
to be supplied in breaks format, then converted by the subclass method
|
| 37 |
+
get_kwargs_from_breaks to the expected format.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
@pytest.fixture(
|
| 41 |
+
params=[
|
| 42 |
+
([3, 14, 15, 92, 653], np.int64),
|
| 43 |
+
(np.arange(10, dtype="int64"), np.int64),
|
| 44 |
+
(Index(np.arange(-10, 11, dtype=np.int64)), np.int64),
|
| 45 |
+
(Index(np.arange(10, 31, dtype=np.uint64)), np.uint64),
|
| 46 |
+
(Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
|
| 47 |
+
(date_range("20180101", periods=10), "<M8[ns]"),
|
| 48 |
+
(
|
| 49 |
+
date_range("20180101", periods=10, tz="US/Eastern"),
|
| 50 |
+
"datetime64[ns, US/Eastern]",
|
| 51 |
+
),
|
| 52 |
+
(timedelta_range("1 day", periods=10), "<m8[ns]"),
|
| 53 |
+
]
|
| 54 |
+
)
|
| 55 |
+
def breaks_and_expected_subtype(self, request):
|
| 56 |
+
return request.param
|
| 57 |
+
|
| 58 |
+
def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name):
|
| 59 |
+
breaks, expected_subtype = breaks_and_expected_subtype
|
| 60 |
+
|
| 61 |
+
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
|
| 62 |
+
|
| 63 |
+
result = constructor(closed=closed, name=name, **result_kwargs)
|
| 64 |
+
|
| 65 |
+
assert result.closed == closed
|
| 66 |
+
assert result.name == name
|
| 67 |
+
assert result.dtype.subtype == expected_subtype
|
| 68 |
+
tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype))
|
| 69 |
+
tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype))
|
| 70 |
+
|
| 71 |
+
@pytest.mark.parametrize(
|
| 72 |
+
"breaks, subtype",
|
| 73 |
+
[
|
| 74 |
+
(Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"),
|
| 75 |
+
(Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"),
|
| 76 |
+
(Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"),
|
| 77 |
+
(Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"),
|
| 78 |
+
(date_range("2017-01-01", periods=5), "int64"),
|
| 79 |
+
(timedelta_range("1 day", periods=5), "int64"),
|
| 80 |
+
],
|
| 81 |
+
)
|
| 82 |
+
def test_constructor_dtype(self, constructor, breaks, subtype):
|
| 83 |
+
# GH 19262: conversion via dtype parameter
|
| 84 |
+
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
|
| 85 |
+
expected = constructor(**expected_kwargs)
|
| 86 |
+
|
| 87 |
+
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
| 88 |
+
iv_dtype = IntervalDtype(subtype, "right")
|
| 89 |
+
for dtype in (iv_dtype, str(iv_dtype)):
|
| 90 |
+
result = constructor(dtype=dtype, **result_kwargs)
|
| 91 |
+
tm.assert_index_equal(result, expected)
|
| 92 |
+
|
| 93 |
+
@pytest.mark.parametrize(
|
| 94 |
+
"breaks",
|
| 95 |
+
[
|
| 96 |
+
Index([0, 1, 2, 3, 4], dtype=np.int64),
|
| 97 |
+
Index([0, 1, 2, 3, 4], dtype=np.uint64),
|
| 98 |
+
Index([0, 1, 2, 3, 4], dtype=np.float64),
|
| 99 |
+
date_range("2017-01-01", periods=5),
|
| 100 |
+
timedelta_range("1 day", periods=5),
|
| 101 |
+
],
|
| 102 |
+
)
|
| 103 |
+
def test_constructor_pass_closed(self, constructor, breaks):
|
| 104 |
+
# not passing closed to IntervalDtype, but to IntervalArray constructor
|
| 105 |
+
iv_dtype = IntervalDtype(breaks.dtype)
|
| 106 |
+
|
| 107 |
+
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
| 108 |
+
|
| 109 |
+
for dtype in (iv_dtype, str(iv_dtype)):
|
| 110 |
+
with tm.assert_produces_warning(None):
|
| 111 |
+
result = constructor(dtype=dtype, closed="left", **result_kwargs)
|
| 112 |
+
assert result.dtype.closed == "left"
|
| 113 |
+
|
| 114 |
+
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
|
| 115 |
+
def test_constructor_nan(self, constructor, breaks, closed):
|
| 116 |
+
# GH 18421
|
| 117 |
+
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
| 118 |
+
result = constructor(closed=closed, **result_kwargs)
|
| 119 |
+
|
| 120 |
+
expected_subtype = np.float64
|
| 121 |
+
expected_values = np.array(breaks[:-1], dtype=object)
|
| 122 |
+
|
| 123 |
+
assert result.closed == closed
|
| 124 |
+
assert result.dtype.subtype == expected_subtype
|
| 125 |
+
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
| 126 |
+
|
| 127 |
+
@pytest.mark.parametrize(
|
| 128 |
+
"breaks",
|
| 129 |
+
[
|
| 130 |
+
[],
|
| 131 |
+
np.array([], dtype="int64"),
|
| 132 |
+
np.array([], dtype="uint64"),
|
| 133 |
+
np.array([], dtype="float64"),
|
| 134 |
+
np.array([], dtype="datetime64[ns]"),
|
| 135 |
+
np.array([], dtype="timedelta64[ns]"),
|
| 136 |
+
],
|
| 137 |
+
)
|
| 138 |
+
def test_constructor_empty(self, constructor, breaks, closed):
|
| 139 |
+
# GH 18421
|
| 140 |
+
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
| 141 |
+
result = constructor(closed=closed, **result_kwargs)
|
| 142 |
+
|
| 143 |
+
expected_values = np.array([], dtype=object)
|
| 144 |
+
expected_subtype = getattr(breaks, "dtype", np.int64)
|
| 145 |
+
|
| 146 |
+
assert result.empty
|
| 147 |
+
assert result.closed == closed
|
| 148 |
+
assert result.dtype.subtype == expected_subtype
|
| 149 |
+
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
| 150 |
+
|
| 151 |
+
@pytest.mark.parametrize(
|
| 152 |
+
"breaks",
|
| 153 |
+
[
|
| 154 |
+
tuple("0123456789"),
|
| 155 |
+
list("abcdefghij"),
|
| 156 |
+
np.array(list("abcdefghij"), dtype=object),
|
| 157 |
+
np.array(list("abcdefghij"), dtype="<U1"),
|
| 158 |
+
],
|
| 159 |
+
)
|
| 160 |
+
def test_constructor_string(self, constructor, breaks):
|
| 161 |
+
# GH 19016
|
| 162 |
+
msg = (
|
| 163 |
+
"category, object, and string subtypes are not supported "
|
| 164 |
+
"for IntervalIndex"
|
| 165 |
+
)
|
| 166 |
+
with pytest.raises(TypeError, match=msg):
|
| 167 |
+
constructor(**self.get_kwargs_from_breaks(breaks))
|
| 168 |
+
|
| 169 |
+
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
|
| 170 |
+
def test_constructor_categorical_valid(self, constructor, cat_constructor):
|
| 171 |
+
# GH 21243/21253
|
| 172 |
+
|
| 173 |
+
breaks = np.arange(10, dtype="int64")
|
| 174 |
+
expected = IntervalIndex.from_breaks(breaks)
|
| 175 |
+
|
| 176 |
+
cat_breaks = cat_constructor(breaks)
|
| 177 |
+
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
|
| 178 |
+
result = constructor(**result_kwargs)
|
| 179 |
+
tm.assert_index_equal(result, expected)
|
| 180 |
+
|
| 181 |
+
def test_generic_errors(self, constructor):
|
| 182 |
+
# filler input data to be used when supplying invalid kwargs
|
| 183 |
+
filler = self.get_kwargs_from_breaks(range(10))
|
| 184 |
+
|
| 185 |
+
# invalid closed
|
| 186 |
+
msg = "closed must be one of 'right', 'left', 'both', 'neither'"
|
| 187 |
+
with pytest.raises(ValueError, match=msg):
|
| 188 |
+
constructor(closed="invalid", **filler)
|
| 189 |
+
|
| 190 |
+
# unsupported dtype
|
| 191 |
+
msg = "dtype must be an IntervalDtype, got int64"
|
| 192 |
+
with pytest.raises(TypeError, match=msg):
|
| 193 |
+
constructor(dtype="int64", **filler)
|
| 194 |
+
|
| 195 |
+
# invalid dtype
|
| 196 |
+
msg = "data type [\"']invalid[\"'] not understood"
|
| 197 |
+
with pytest.raises(TypeError, match=msg):
|
| 198 |
+
constructor(dtype="invalid", **filler)
|
| 199 |
+
|
| 200 |
+
# no point in nesting periods in an IntervalIndex
|
| 201 |
+
periods = period_range("2000-01-01", periods=10)
|
| 202 |
+
periods_kwargs = self.get_kwargs_from_breaks(periods)
|
| 203 |
+
msg = "Period dtypes are not supported, use a PeriodIndex instead"
|
| 204 |
+
with pytest.raises(ValueError, match=msg):
|
| 205 |
+
constructor(**periods_kwargs)
|
| 206 |
+
|
| 207 |
+
# decreasing values
|
| 208 |
+
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
|
| 209 |
+
msg = "left side of interval must be <= right side"
|
| 210 |
+
with pytest.raises(ValueError, match=msg):
|
| 211 |
+
constructor(**decreasing_kwargs)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
class TestFromArrays(ConstructorTests):
|
| 215 |
+
"""Tests specific to IntervalIndex.from_arrays"""
|
| 216 |
+
|
| 217 |
+
@pytest.fixture
|
| 218 |
+
def constructor(self):
|
| 219 |
+
return IntervalIndex.from_arrays
|
| 220 |
+
|
| 221 |
+
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
| 222 |
+
"""
|
| 223 |
+
converts intervals in breaks format to a dictionary of kwargs to
|
| 224 |
+
specific to the format expected by IntervalIndex.from_arrays
|
| 225 |
+
"""
|
| 226 |
+
return {"left": breaks[:-1], "right": breaks[1:]}
|
| 227 |
+
|
| 228 |
+
def test_constructor_errors(self):
|
| 229 |
+
# GH 19016: categorical data
|
| 230 |
+
data = Categorical(list("01234abcde"), ordered=True)
|
| 231 |
+
msg = (
|
| 232 |
+
"category, object, and string subtypes are not supported "
|
| 233 |
+
"for IntervalIndex"
|
| 234 |
+
)
|
| 235 |
+
with pytest.raises(TypeError, match=msg):
|
| 236 |
+
IntervalIndex.from_arrays(data[:-1], data[1:])
|
| 237 |
+
|
| 238 |
+
# unequal length
|
| 239 |
+
left = [0, 1, 2]
|
| 240 |
+
right = [2, 3]
|
| 241 |
+
msg = "left and right must have the same length"
|
| 242 |
+
with pytest.raises(ValueError, match=msg):
|
| 243 |
+
IntervalIndex.from_arrays(left, right)
|
| 244 |
+
|
| 245 |
+
@pytest.mark.parametrize(
|
| 246 |
+
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
|
| 247 |
+
)
|
| 248 |
+
def test_mixed_float_int(self, left_subtype, right_subtype):
|
| 249 |
+
"""mixed int/float left/right results in float for both sides"""
|
| 250 |
+
left = np.arange(9, dtype=left_subtype)
|
| 251 |
+
right = np.arange(1, 10, dtype=right_subtype)
|
| 252 |
+
result = IntervalIndex.from_arrays(left, right)
|
| 253 |
+
|
| 254 |
+
expected_left = Index(left, dtype=np.float64)
|
| 255 |
+
expected_right = Index(right, dtype=np.float64)
|
| 256 |
+
expected_subtype = np.float64
|
| 257 |
+
|
| 258 |
+
tm.assert_index_equal(result.left, expected_left)
|
| 259 |
+
tm.assert_index_equal(result.right, expected_right)
|
| 260 |
+
assert result.dtype.subtype == expected_subtype
|
| 261 |
+
|
| 262 |
+
@pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex])
|
| 263 |
+
def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls):
|
| 264 |
+
# GH#55714
|
| 265 |
+
left = date_range("2016-01-01", periods=3, unit="s")
|
| 266 |
+
right = date_range("2017-01-01", periods=3, unit="ms")
|
| 267 |
+
result = interval_cls.from_arrays(left, right)
|
| 268 |
+
expected = interval_cls.from_arrays(left.as_unit("ms"), right)
|
| 269 |
+
tm.assert_equal(result, expected)
|
| 270 |
+
|
| 271 |
+
# td64
|
| 272 |
+
left2 = left - left[0]
|
| 273 |
+
right2 = right - left[0]
|
| 274 |
+
result2 = interval_cls.from_arrays(left2, right2)
|
| 275 |
+
expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2)
|
| 276 |
+
tm.assert_equal(result2, expected2)
|
| 277 |
+
|
| 278 |
+
# dt64tz
|
| 279 |
+
left3 = left.tz_localize("UTC")
|
| 280 |
+
right3 = right.tz_localize("UTC")
|
| 281 |
+
result3 = interval_cls.from_arrays(left3, right3)
|
| 282 |
+
expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3)
|
| 283 |
+
tm.assert_equal(result3, expected3)
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
class TestFromBreaks(ConstructorTests):
|
| 287 |
+
"""Tests specific to IntervalIndex.from_breaks"""
|
| 288 |
+
|
| 289 |
+
@pytest.fixture
|
| 290 |
+
def constructor(self):
|
| 291 |
+
return IntervalIndex.from_breaks
|
| 292 |
+
|
| 293 |
+
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
| 294 |
+
"""
|
| 295 |
+
converts intervals in breaks format to a dictionary of kwargs to
|
| 296 |
+
specific to the format expected by IntervalIndex.from_breaks
|
| 297 |
+
"""
|
| 298 |
+
return {"breaks": breaks}
|
| 299 |
+
|
| 300 |
+
def test_constructor_errors(self):
|
| 301 |
+
# GH 19016: categorical data
|
| 302 |
+
data = Categorical(list("01234abcde"), ordered=True)
|
| 303 |
+
msg = (
|
| 304 |
+
"category, object, and string subtypes are not supported "
|
| 305 |
+
"for IntervalIndex"
|
| 306 |
+
)
|
| 307 |
+
with pytest.raises(TypeError, match=msg):
|
| 308 |
+
IntervalIndex.from_breaks(data)
|
| 309 |
+
|
| 310 |
+
def test_length_one(self):
|
| 311 |
+
"""breaks of length one produce an empty IntervalIndex"""
|
| 312 |
+
breaks = [0]
|
| 313 |
+
result = IntervalIndex.from_breaks(breaks)
|
| 314 |
+
expected = IntervalIndex.from_breaks([])
|
| 315 |
+
tm.assert_index_equal(result, expected)
|
| 316 |
+
|
| 317 |
+
def test_left_right_dont_share_data(self):
|
| 318 |
+
# GH#36310
|
| 319 |
+
breaks = np.arange(5)
|
| 320 |
+
result = IntervalIndex.from_breaks(breaks)._data
|
| 321 |
+
assert result._left.base is None or result._left.base is not result._right.base
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
class TestFromTuples(ConstructorTests):
|
| 325 |
+
"""Tests specific to IntervalIndex.from_tuples"""
|
| 326 |
+
|
| 327 |
+
@pytest.fixture
|
| 328 |
+
def constructor(self):
|
| 329 |
+
return IntervalIndex.from_tuples
|
| 330 |
+
|
| 331 |
+
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
| 332 |
+
"""
|
| 333 |
+
converts intervals in breaks format to a dictionary of kwargs to
|
| 334 |
+
specific to the format expected by IntervalIndex.from_tuples
|
| 335 |
+
"""
|
| 336 |
+
if is_unsigned_integer_dtype(breaks):
|
| 337 |
+
pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests")
|
| 338 |
+
|
| 339 |
+
if len(breaks) == 0:
|
| 340 |
+
return {"data": breaks}
|
| 341 |
+
|
| 342 |
+
tuples = list(zip(breaks[:-1], breaks[1:]))
|
| 343 |
+
if isinstance(breaks, (list, tuple)):
|
| 344 |
+
return {"data": tuples}
|
| 345 |
+
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
|
| 346 |
+
return {"data": breaks._constructor(tuples)}
|
| 347 |
+
return {"data": com.asarray_tuplesafe(tuples)}
|
| 348 |
+
|
| 349 |
+
def test_constructor_errors(self):
|
| 350 |
+
# non-tuple
|
| 351 |
+
tuples = [(0, 1), 2, (3, 4)]
|
| 352 |
+
msg = "IntervalIndex.from_tuples received an invalid item, 2"
|
| 353 |
+
with pytest.raises(TypeError, match=msg.format(t=tuples)):
|
| 354 |
+
IntervalIndex.from_tuples(tuples)
|
| 355 |
+
|
| 356 |
+
# too few/many items
|
| 357 |
+
tuples = [(0, 1), (2,), (3, 4)]
|
| 358 |
+
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
|
| 359 |
+
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
| 360 |
+
IntervalIndex.from_tuples(tuples)
|
| 361 |
+
|
| 362 |
+
tuples = [(0, 1), (2, 3, 4), (5, 6)]
|
| 363 |
+
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
| 364 |
+
IntervalIndex.from_tuples(tuples)
|
| 365 |
+
|
| 366 |
+
def test_na_tuples(self):
|
| 367 |
+
# tuple (NA, NA) evaluates the same as NA as an element
|
| 368 |
+
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
|
| 369 |
+
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
|
| 370 |
+
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
| 371 |
+
tm.assert_index_equal(idx_na_tuple, idx_na_element)
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
class TestClassConstructors(ConstructorTests):
|
| 375 |
+
"""Tests specific to the IntervalIndex/Index constructors"""
|
| 376 |
+
|
| 377 |
+
@pytest.fixture(
|
| 378 |
+
params=[IntervalIndex, partial(Index, dtype="interval")],
|
| 379 |
+
ids=["IntervalIndex", "Index"],
|
| 380 |
+
)
|
| 381 |
+
def klass(self, request):
|
| 382 |
+
# We use a separate fixture here to include Index.__new__ with dtype kwarg
|
| 383 |
+
return request.param
|
| 384 |
+
|
| 385 |
+
@pytest.fixture
|
| 386 |
+
def constructor(self):
|
| 387 |
+
return IntervalIndex
|
| 388 |
+
|
| 389 |
+
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
| 390 |
+
"""
|
| 391 |
+
converts intervals in breaks format to a dictionary of kwargs to
|
| 392 |
+
specific to the format expected by the IntervalIndex/Index constructors
|
| 393 |
+
"""
|
| 394 |
+
if is_unsigned_integer_dtype(breaks):
|
| 395 |
+
pytest.skip(f"{breaks.dtype} not relevant for class constructor tests")
|
| 396 |
+
|
| 397 |
+
if len(breaks) == 0:
|
| 398 |
+
return {"data": breaks}
|
| 399 |
+
|
| 400 |
+
ivs = [
|
| 401 |
+
Interval(left, right, closed) if notna(left) else left
|
| 402 |
+
for left, right in zip(breaks[:-1], breaks[1:])
|
| 403 |
+
]
|
| 404 |
+
|
| 405 |
+
if isinstance(breaks, list):
|
| 406 |
+
return {"data": ivs}
|
| 407 |
+
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
|
| 408 |
+
return {"data": breaks._constructor(ivs)}
|
| 409 |
+
return {"data": np.array(ivs, dtype=object)}
|
| 410 |
+
|
| 411 |
+
def test_generic_errors(self, constructor):
|
| 412 |
+
"""
|
| 413 |
+
override the base class implementation since errors are handled
|
| 414 |
+
differently; checks unnecessary since caught at the Interval level
|
| 415 |
+
"""
|
| 416 |
+
|
| 417 |
+
def test_constructor_string(self):
|
| 418 |
+
# GH23013
|
| 419 |
+
# When forming the interval from breaks,
|
| 420 |
+
# the interval of strings is already forbidden.
|
| 421 |
+
pass
|
| 422 |
+
|
| 423 |
+
def test_constructor_errors(self, klass):
|
| 424 |
+
# mismatched closed within intervals with no constructor override
|
| 425 |
+
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
|
| 426 |
+
msg = "intervals must all be closed on the same side"
|
| 427 |
+
with pytest.raises(ValueError, match=msg):
|
| 428 |
+
klass(ivs)
|
| 429 |
+
|
| 430 |
+
# scalar
|
| 431 |
+
msg = (
|
| 432 |
+
r"(IntervalIndex|Index)\(...\) must be called with a collection of "
|
| 433 |
+
"some kind, 5 was passed"
|
| 434 |
+
)
|
| 435 |
+
with pytest.raises(TypeError, match=msg):
|
| 436 |
+
klass(5)
|
| 437 |
+
|
| 438 |
+
# not an interval; dtype depends on 32bit/windows builds
|
| 439 |
+
msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
|
| 440 |
+
with pytest.raises(TypeError, match=msg):
|
| 441 |
+
klass([0, 1])
|
| 442 |
+
|
| 443 |
+
@pytest.mark.parametrize(
|
| 444 |
+
"data, closed",
|
| 445 |
+
[
|
| 446 |
+
([], "both"),
|
| 447 |
+
([np.nan, np.nan], "neither"),
|
| 448 |
+
(
|
| 449 |
+
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
|
| 450 |
+
"left",
|
| 451 |
+
),
|
| 452 |
+
(
|
| 453 |
+
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
|
| 454 |
+
"neither",
|
| 455 |
+
),
|
| 456 |
+
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
|
| 457 |
+
],
|
| 458 |
+
)
|
| 459 |
+
def test_override_inferred_closed(self, constructor, data, closed):
|
| 460 |
+
# GH 19370
|
| 461 |
+
if isinstance(data, IntervalIndex):
|
| 462 |
+
tuples = data.to_tuples()
|
| 463 |
+
else:
|
| 464 |
+
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
|
| 465 |
+
expected = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 466 |
+
result = constructor(data, closed=closed)
|
| 467 |
+
tm.assert_index_equal(result, expected)
|
| 468 |
+
|
| 469 |
+
@pytest.mark.parametrize(
|
| 470 |
+
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
|
| 471 |
+
)
|
| 472 |
+
def test_index_object_dtype(self, values_constructor):
|
| 473 |
+
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
|
| 474 |
+
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
|
| 475 |
+
values = values_constructor(intervals)
|
| 476 |
+
result = Index(values, dtype=object)
|
| 477 |
+
|
| 478 |
+
assert type(result) is Index
|
| 479 |
+
tm.assert_numpy_array_equal(result.values, np.array(values))
|
| 480 |
+
|
| 481 |
+
def test_index_mixed_closed(self):
|
| 482 |
+
# GH27172
|
| 483 |
+
intervals = [
|
| 484 |
+
Interval(0, 1, closed="left"),
|
| 485 |
+
Interval(1, 2, closed="right"),
|
| 486 |
+
Interval(2, 3, closed="neither"),
|
| 487 |
+
Interval(3, 4, closed="both"),
|
| 488 |
+
]
|
| 489 |
+
result = Index(intervals)
|
| 490 |
+
expected = Index(intervals, dtype=object)
|
| 491 |
+
tm.assert_index_equal(result, expected)
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"])
|
| 495 |
+
def test_interval_index_subtype(timezone, inclusive_endpoints_fixture):
|
| 496 |
+
# GH#46999
|
| 497 |
+
dates = date_range("2022", periods=3, tz=timezone)
|
| 498 |
+
dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]"
|
| 499 |
+
result = IntervalIndex.from_arrays(
|
| 500 |
+
["2022-01-01", "2022-01-02"],
|
| 501 |
+
["2022-01-02", "2022-01-03"],
|
| 502 |
+
closed=inclusive_endpoints_fixture,
|
| 503 |
+
dtype=dtype,
|
| 504 |
+
)
|
| 505 |
+
expected = IntervalIndex.from_arrays(
|
| 506 |
+
dates[:-1], dates[1:], closed=inclusive_endpoints_fixture
|
| 507 |
+
)
|
| 508 |
+
tm.assert_index_equal(result, expected)
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
def test_dtype_closed_mismatch():
|
| 512 |
+
# GH#38394 closed specified in both dtype and IntervalIndex constructor
|
| 513 |
+
|
| 514 |
+
dtype = IntervalDtype(np.int64, "left")
|
| 515 |
+
|
| 516 |
+
msg = "closed keyword does not match dtype.closed"
|
| 517 |
+
with pytest.raises(ValueError, match=msg):
|
| 518 |
+
IntervalIndex([], dtype=dtype, closed="neither")
|
| 519 |
+
|
| 520 |
+
with pytest.raises(ValueError, match=msg):
|
| 521 |
+
IntervalArray([], dtype=dtype, closed="neither")
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
@pytest.mark.parametrize(
|
| 525 |
+
"dtype",
|
| 526 |
+
["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))],
|
| 527 |
+
)
|
| 528 |
+
def test_ea_dtype(dtype):
|
| 529 |
+
# GH#56765
|
| 530 |
+
bins = [(0.0, 0.4), (0.4, 0.6)]
|
| 531 |
+
interval_dtype = IntervalDtype(subtype=dtype, closed="left")
|
| 532 |
+
result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype)
|
| 533 |
+
assert result.dtype == interval_dtype
|
| 534 |
+
expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype)
|
| 535 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_equals.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
IntervalIndex,
|
| 5 |
+
date_range,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestEquals:
|
| 10 |
+
def test_equals(self, closed):
|
| 11 |
+
expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
|
| 12 |
+
assert expected.equals(expected)
|
| 13 |
+
assert expected.equals(expected.copy())
|
| 14 |
+
|
| 15 |
+
assert not expected.equals(expected.astype(object))
|
| 16 |
+
assert not expected.equals(np.array(expected))
|
| 17 |
+
assert not expected.equals(list(expected))
|
| 18 |
+
|
| 19 |
+
assert not expected.equals([1, 2])
|
| 20 |
+
assert not expected.equals(np.array([1, 2]))
|
| 21 |
+
assert not expected.equals(date_range("20130101", periods=2))
|
| 22 |
+
|
| 23 |
+
expected_name1 = IntervalIndex.from_breaks(
|
| 24 |
+
np.arange(5), closed=closed, name="foo"
|
| 25 |
+
)
|
| 26 |
+
expected_name2 = IntervalIndex.from_breaks(
|
| 27 |
+
np.arange(5), closed=closed, name="bar"
|
| 28 |
+
)
|
| 29 |
+
assert expected.equals(expected_name1)
|
| 30 |
+
assert expected_name1.equals(expected_name2)
|
| 31 |
+
|
| 32 |
+
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
| 33 |
+
expected_other_closed = IntervalIndex.from_breaks(
|
| 34 |
+
np.arange(5), closed=other_closed
|
| 35 |
+
)
|
| 36 |
+
assert not expected.equals(expected_other_closed)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_formats.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
DataFrame,
|
| 6 |
+
DatetimeIndex,
|
| 7 |
+
Index,
|
| 8 |
+
Interval,
|
| 9 |
+
IntervalIndex,
|
| 10 |
+
Series,
|
| 11 |
+
Timedelta,
|
| 12 |
+
Timestamp,
|
| 13 |
+
)
|
| 14 |
+
import pandas._testing as tm
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TestIntervalIndexRendering:
|
| 18 |
+
# TODO: this is a test for DataFrame/Series, not IntervalIndex
|
| 19 |
+
@pytest.mark.parametrize(
|
| 20 |
+
"constructor,expected",
|
| 21 |
+
[
|
| 22 |
+
(
|
| 23 |
+
Series,
|
| 24 |
+
(
|
| 25 |
+
"(0.0, 1.0] a\n"
|
| 26 |
+
"NaN b\n"
|
| 27 |
+
"(2.0, 3.0] c\n"
|
| 28 |
+
"dtype: object"
|
| 29 |
+
),
|
| 30 |
+
),
|
| 31 |
+
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
|
| 32 |
+
],
|
| 33 |
+
)
|
| 34 |
+
def test_repr_missing(self, constructor, expected, using_infer_string, request):
|
| 35 |
+
# GH 25984
|
| 36 |
+
if using_infer_string and constructor is Series:
|
| 37 |
+
request.applymarker(pytest.mark.xfail(reason="repr different"))
|
| 38 |
+
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
| 39 |
+
obj = constructor(list("abc"), index=index)
|
| 40 |
+
result = repr(obj)
|
| 41 |
+
assert result == expected
|
| 42 |
+
|
| 43 |
+
def test_repr_floats(self):
|
| 44 |
+
# GH 32553
|
| 45 |
+
|
| 46 |
+
markers = Series(
|
| 47 |
+
[1, 2],
|
| 48 |
+
index=IntervalIndex(
|
| 49 |
+
[
|
| 50 |
+
Interval(left, right)
|
| 51 |
+
for left, right in zip(
|
| 52 |
+
Index([329.973, 345.137], dtype="float64"),
|
| 53 |
+
Index([345.137, 360.191], dtype="float64"),
|
| 54 |
+
)
|
| 55 |
+
]
|
| 56 |
+
),
|
| 57 |
+
)
|
| 58 |
+
result = str(markers)
|
| 59 |
+
expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64"
|
| 60 |
+
assert result == expected
|
| 61 |
+
|
| 62 |
+
@pytest.mark.filterwarnings(
|
| 63 |
+
"ignore:invalid value encountered in cast:RuntimeWarning"
|
| 64 |
+
)
|
| 65 |
+
@pytest.mark.parametrize(
|
| 66 |
+
"tuples, closed, expected_data",
|
| 67 |
+
[
|
| 68 |
+
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
|
| 69 |
+
(
|
| 70 |
+
[(0.5, 1.0), np.nan, (2.0, 3.0)],
|
| 71 |
+
"right",
|
| 72 |
+
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
|
| 73 |
+
),
|
| 74 |
+
(
|
| 75 |
+
[
|
| 76 |
+
(Timestamp("20180101"), Timestamp("20180102")),
|
| 77 |
+
np.nan,
|
| 78 |
+
((Timestamp("20180102"), Timestamp("20180103"))),
|
| 79 |
+
],
|
| 80 |
+
"both",
|
| 81 |
+
[
|
| 82 |
+
"[2018-01-01 00:00:00, 2018-01-02 00:00:00]",
|
| 83 |
+
"NaN",
|
| 84 |
+
"[2018-01-02 00:00:00, 2018-01-03 00:00:00]",
|
| 85 |
+
],
|
| 86 |
+
),
|
| 87 |
+
(
|
| 88 |
+
[
|
| 89 |
+
(Timedelta("0 days"), Timedelta("1 days")),
|
| 90 |
+
(Timedelta("1 days"), Timedelta("2 days")),
|
| 91 |
+
np.nan,
|
| 92 |
+
],
|
| 93 |
+
"neither",
|
| 94 |
+
[
|
| 95 |
+
"(0 days 00:00:00, 1 days 00:00:00)",
|
| 96 |
+
"(1 days 00:00:00, 2 days 00:00:00)",
|
| 97 |
+
"NaN",
|
| 98 |
+
],
|
| 99 |
+
),
|
| 100 |
+
],
|
| 101 |
+
)
|
| 102 |
+
def test_get_values_for_csv(self, tuples, closed, expected_data):
|
| 103 |
+
# GH 28210
|
| 104 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 105 |
+
result = index._get_values_for_csv(na_rep="NaN")
|
| 106 |
+
expected = np.array(expected_data)
|
| 107 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 108 |
+
|
| 109 |
+
def test_timestamp_with_timezone(self, unit):
|
| 110 |
+
# GH 55035
|
| 111 |
+
left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
|
| 112 |
+
right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
|
| 113 |
+
index = IntervalIndex.from_arrays(left, right)
|
| 114 |
+
result = repr(index)
|
| 115 |
+
expected = (
|
| 116 |
+
"IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
|
| 117 |
+
f"dtype='interval[datetime64[{unit}, UTC], right]')"
|
| 118 |
+
)
|
| 119 |
+
assert result == expected
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_indexing.py
ADDED
|
@@ -0,0 +1,674 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas.errors import InvalidIndexError
|
| 7 |
+
|
| 8 |
+
from pandas import (
|
| 9 |
+
NA,
|
| 10 |
+
CategoricalIndex,
|
| 11 |
+
DatetimeIndex,
|
| 12 |
+
Index,
|
| 13 |
+
Interval,
|
| 14 |
+
IntervalIndex,
|
| 15 |
+
MultiIndex,
|
| 16 |
+
NaT,
|
| 17 |
+
Timedelta,
|
| 18 |
+
Timestamp,
|
| 19 |
+
array,
|
| 20 |
+
date_range,
|
| 21 |
+
interval_range,
|
| 22 |
+
isna,
|
| 23 |
+
period_range,
|
| 24 |
+
timedelta_range,
|
| 25 |
+
)
|
| 26 |
+
import pandas._testing as tm
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class TestGetItem:
|
| 30 |
+
def test_getitem(self, closed):
|
| 31 |
+
idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
|
| 32 |
+
assert idx[0] == Interval(0.0, 1.0, closed=closed)
|
| 33 |
+
assert idx[1] == Interval(1.0, 2.0, closed=closed)
|
| 34 |
+
assert isna(idx[2])
|
| 35 |
+
|
| 36 |
+
result = idx[0:1]
|
| 37 |
+
expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
|
| 38 |
+
tm.assert_index_equal(result, expected)
|
| 39 |
+
|
| 40 |
+
result = idx[0:2]
|
| 41 |
+
expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
|
| 42 |
+
tm.assert_index_equal(result, expected)
|
| 43 |
+
|
| 44 |
+
result = idx[1:3]
|
| 45 |
+
expected = IntervalIndex.from_arrays(
|
| 46 |
+
(1.0, np.nan), (2.0, np.nan), closed=closed
|
| 47 |
+
)
|
| 48 |
+
tm.assert_index_equal(result, expected)
|
| 49 |
+
|
| 50 |
+
def test_getitem_2d_deprecated(self):
|
| 51 |
+
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
|
| 52 |
+
idx = IntervalIndex.from_breaks(range(11), closed="right")
|
| 53 |
+
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
| 54 |
+
idx[:, None]
|
| 55 |
+
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
| 56 |
+
# GH#44051
|
| 57 |
+
idx[True]
|
| 58 |
+
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
| 59 |
+
# GH#44051
|
| 60 |
+
idx[False]
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class TestWhere:
|
| 64 |
+
def test_where(self, listlike_box):
|
| 65 |
+
klass = listlike_box
|
| 66 |
+
|
| 67 |
+
idx = IntervalIndex.from_breaks(range(11), closed="right")
|
| 68 |
+
cond = [True] * len(idx)
|
| 69 |
+
expected = idx
|
| 70 |
+
result = expected.where(klass(cond))
|
| 71 |
+
tm.assert_index_equal(result, expected)
|
| 72 |
+
|
| 73 |
+
cond = [False] + [True] * len(idx[1:])
|
| 74 |
+
expected = IntervalIndex([np.nan] + idx[1:].tolist())
|
| 75 |
+
result = idx.where(klass(cond))
|
| 76 |
+
tm.assert_index_equal(result, expected)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class TestTake:
|
| 80 |
+
def test_take(self, closed):
|
| 81 |
+
index = IntervalIndex.from_breaks(range(11), closed=closed)
|
| 82 |
+
|
| 83 |
+
result = index.take(range(10))
|
| 84 |
+
tm.assert_index_equal(result, index)
|
| 85 |
+
|
| 86 |
+
result = index.take([0, 0, 1])
|
| 87 |
+
expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
|
| 88 |
+
tm.assert_index_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class TestGetLoc:
|
| 92 |
+
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
|
| 93 |
+
def test_get_loc_interval(self, closed, side):
|
| 94 |
+
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
| 95 |
+
|
| 96 |
+
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
|
| 97 |
+
# if get_loc is supplied an interval, it should only search
|
| 98 |
+
# for exact matches, not overlaps or covers, else KeyError.
|
| 99 |
+
msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
|
| 100 |
+
if closed == side:
|
| 101 |
+
if bound == [0, 1]:
|
| 102 |
+
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
|
| 103 |
+
elif bound == [2, 3]:
|
| 104 |
+
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
|
| 105 |
+
else:
|
| 106 |
+
with pytest.raises(KeyError, match=msg):
|
| 107 |
+
idx.get_loc(Interval(*bound, closed=side))
|
| 108 |
+
else:
|
| 109 |
+
with pytest.raises(KeyError, match=msg):
|
| 110 |
+
idx.get_loc(Interval(*bound, closed=side))
|
| 111 |
+
|
| 112 |
+
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
|
| 113 |
+
def test_get_loc_scalar(self, closed, scalar):
|
| 114 |
+
# correct = {side: {query: answer}}.
|
| 115 |
+
# If query is not in the dict, that query should raise a KeyError
|
| 116 |
+
correct = {
|
| 117 |
+
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
|
| 118 |
+
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
|
| 119 |
+
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
|
| 120 |
+
"neither": {0.5: 0, 2.5: 1},
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
| 124 |
+
|
| 125 |
+
# if get_loc is supplied a scalar, it should return the index of
|
| 126 |
+
# the interval which contains the scalar, or KeyError.
|
| 127 |
+
if scalar in correct[closed].keys():
|
| 128 |
+
assert idx.get_loc(scalar) == correct[closed][scalar]
|
| 129 |
+
else:
|
| 130 |
+
with pytest.raises(KeyError, match=str(scalar)):
|
| 131 |
+
idx.get_loc(scalar)
|
| 132 |
+
|
| 133 |
+
@pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
|
| 134 |
+
def test_get_loc_length_one_scalar(self, scalar, closed):
|
| 135 |
+
# GH 20921
|
| 136 |
+
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
| 137 |
+
if scalar in index[0]:
|
| 138 |
+
result = index.get_loc(scalar)
|
| 139 |
+
assert result == 0
|
| 140 |
+
else:
|
| 141 |
+
with pytest.raises(KeyError, match=str(scalar)):
|
| 142 |
+
index.get_loc(scalar)
|
| 143 |
+
|
| 144 |
+
@pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
|
| 145 |
+
@pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
|
| 146 |
+
def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
|
| 147 |
+
# GH 20921
|
| 148 |
+
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
| 149 |
+
interval = Interval(left, right, closed=other_closed)
|
| 150 |
+
if interval == index[0]:
|
| 151 |
+
result = index.get_loc(interval)
|
| 152 |
+
assert result == 0
|
| 153 |
+
else:
|
| 154 |
+
with pytest.raises(
|
| 155 |
+
KeyError,
|
| 156 |
+
match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
|
| 157 |
+
):
|
| 158 |
+
index.get_loc(interval)
|
| 159 |
+
|
| 160 |
+
# Make consistent with test_interval_new.py (see #16316, #16386)
|
| 161 |
+
@pytest.mark.parametrize(
|
| 162 |
+
"breaks",
|
| 163 |
+
[
|
| 164 |
+
date_range("20180101", periods=4),
|
| 165 |
+
date_range("20180101", periods=4, tz="US/Eastern"),
|
| 166 |
+
timedelta_range("0 days", periods=4),
|
| 167 |
+
],
|
| 168 |
+
ids=lambda x: str(x.dtype),
|
| 169 |
+
)
|
| 170 |
+
def test_get_loc_datetimelike_nonoverlapping(self, breaks):
|
| 171 |
+
# GH 20636
|
| 172 |
+
# nonoverlapping = IntervalIndex method and no i8 conversion
|
| 173 |
+
index = IntervalIndex.from_breaks(breaks)
|
| 174 |
+
|
| 175 |
+
value = index[0].mid
|
| 176 |
+
result = index.get_loc(value)
|
| 177 |
+
expected = 0
|
| 178 |
+
assert result == expected
|
| 179 |
+
|
| 180 |
+
interval = Interval(index[0].left, index[0].right)
|
| 181 |
+
result = index.get_loc(interval)
|
| 182 |
+
expected = 0
|
| 183 |
+
assert result == expected
|
| 184 |
+
|
| 185 |
+
@pytest.mark.parametrize(
|
| 186 |
+
"arrays",
|
| 187 |
+
[
|
| 188 |
+
(date_range("20180101", periods=4), date_range("20180103", periods=4)),
|
| 189 |
+
(
|
| 190 |
+
date_range("20180101", periods=4, tz="US/Eastern"),
|
| 191 |
+
date_range("20180103", periods=4, tz="US/Eastern"),
|
| 192 |
+
),
|
| 193 |
+
(
|
| 194 |
+
timedelta_range("0 days", periods=4),
|
| 195 |
+
timedelta_range("2 days", periods=4),
|
| 196 |
+
),
|
| 197 |
+
],
|
| 198 |
+
ids=lambda x: str(x[0].dtype),
|
| 199 |
+
)
|
| 200 |
+
def test_get_loc_datetimelike_overlapping(self, arrays):
|
| 201 |
+
# GH 20636
|
| 202 |
+
index = IntervalIndex.from_arrays(*arrays)
|
| 203 |
+
|
| 204 |
+
value = index[0].mid + Timedelta("12 hours")
|
| 205 |
+
result = index.get_loc(value)
|
| 206 |
+
expected = slice(0, 2, None)
|
| 207 |
+
assert result == expected
|
| 208 |
+
|
| 209 |
+
interval = Interval(index[0].left, index[0].right)
|
| 210 |
+
result = index.get_loc(interval)
|
| 211 |
+
expected = 0
|
| 212 |
+
assert result == expected
|
| 213 |
+
|
| 214 |
+
@pytest.mark.parametrize(
|
| 215 |
+
"values",
|
| 216 |
+
[
|
| 217 |
+
date_range("2018-01-04", periods=4, freq="-1D"),
|
| 218 |
+
date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
|
| 219 |
+
timedelta_range("3 days", periods=4, freq="-1D"),
|
| 220 |
+
np.arange(3.0, -1.0, -1.0),
|
| 221 |
+
np.arange(3, -1, -1),
|
| 222 |
+
],
|
| 223 |
+
ids=lambda x: str(x.dtype),
|
| 224 |
+
)
|
| 225 |
+
def test_get_loc_decreasing(self, values):
|
| 226 |
+
# GH 25860
|
| 227 |
+
index = IntervalIndex.from_arrays(values[1:], values[:-1])
|
| 228 |
+
result = index.get_loc(index[0])
|
| 229 |
+
expected = 0
|
| 230 |
+
assert result == expected
|
| 231 |
+
|
| 232 |
+
@pytest.mark.parametrize("key", [[5], (2, 3)])
|
| 233 |
+
def test_get_loc_non_scalar_errors(self, key):
|
| 234 |
+
# GH 31117
|
| 235 |
+
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
|
| 236 |
+
|
| 237 |
+
msg = str(key)
|
| 238 |
+
with pytest.raises(InvalidIndexError, match=msg):
|
| 239 |
+
idx.get_loc(key)
|
| 240 |
+
|
| 241 |
+
def test_get_indexer_with_nans(self):
|
| 242 |
+
# GH#41831
|
| 243 |
+
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
|
| 244 |
+
|
| 245 |
+
expected = np.array([True, False, True])
|
| 246 |
+
for key in [None, np.nan, NA]:
|
| 247 |
+
assert key in index
|
| 248 |
+
result = index.get_loc(key)
|
| 249 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 250 |
+
|
| 251 |
+
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
|
| 252 |
+
with pytest.raises(KeyError, match=str(key)):
|
| 253 |
+
index.get_loc(key)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
class TestGetIndexer:
|
| 257 |
+
@pytest.mark.parametrize(
|
| 258 |
+
"query, expected",
|
| 259 |
+
[
|
| 260 |
+
([Interval(2, 4, closed="right")], [1]),
|
| 261 |
+
([Interval(2, 4, closed="left")], [-1]),
|
| 262 |
+
([Interval(2, 4, closed="both")], [-1]),
|
| 263 |
+
([Interval(2, 4, closed="neither")], [-1]),
|
| 264 |
+
([Interval(1, 4, closed="right")], [-1]),
|
| 265 |
+
([Interval(0, 4, closed="right")], [-1]),
|
| 266 |
+
([Interval(0.5, 1.5, closed="right")], [-1]),
|
| 267 |
+
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
|
| 268 |
+
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
|
| 269 |
+
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
|
| 270 |
+
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
|
| 271 |
+
],
|
| 272 |
+
)
|
| 273 |
+
def test_get_indexer_with_interval(self, query, expected):
|
| 274 |
+
tuples = [(0, 2), (2, 4), (5, 7)]
|
| 275 |
+
index = IntervalIndex.from_tuples(tuples, closed="right")
|
| 276 |
+
|
| 277 |
+
result = index.get_indexer(query)
|
| 278 |
+
expected = np.array(expected, dtype="intp")
|
| 279 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 280 |
+
|
| 281 |
+
@pytest.mark.parametrize(
|
| 282 |
+
"query, expected",
|
| 283 |
+
[
|
| 284 |
+
([-0.5], [-1]),
|
| 285 |
+
([0], [-1]),
|
| 286 |
+
([0.5], [0]),
|
| 287 |
+
([1], [0]),
|
| 288 |
+
([1.5], [1]),
|
| 289 |
+
([2], [1]),
|
| 290 |
+
([2.5], [-1]),
|
| 291 |
+
([3], [-1]),
|
| 292 |
+
([3.5], [2]),
|
| 293 |
+
([4], [2]),
|
| 294 |
+
([4.5], [-1]),
|
| 295 |
+
([1, 2], [0, 1]),
|
| 296 |
+
([1, 2, 3], [0, 1, -1]),
|
| 297 |
+
([1, 2, 3, 4], [0, 1, -1, 2]),
|
| 298 |
+
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
|
| 299 |
+
],
|
| 300 |
+
)
|
| 301 |
+
def test_get_indexer_with_int_and_float(self, query, expected):
|
| 302 |
+
tuples = [(0, 1), (1, 2), (3, 4)]
|
| 303 |
+
index = IntervalIndex.from_tuples(tuples, closed="right")
|
| 304 |
+
|
| 305 |
+
result = index.get_indexer(query)
|
| 306 |
+
expected = np.array(expected, dtype="intp")
|
| 307 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 308 |
+
|
| 309 |
+
@pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
|
| 310 |
+
def test_get_indexer_length_one(self, item, closed):
|
| 311 |
+
# GH 17284
|
| 312 |
+
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
| 313 |
+
result = index.get_indexer(item)
|
| 314 |
+
expected = np.array([0] * len(item), dtype="intp")
|
| 315 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 316 |
+
|
| 317 |
+
@pytest.mark.parametrize("size", [1, 5])
|
| 318 |
+
def test_get_indexer_length_one_interval(self, size, closed):
|
| 319 |
+
# GH 17284
|
| 320 |
+
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
| 321 |
+
result = index.get_indexer([Interval(0, 5, closed)] * size)
|
| 322 |
+
expected = np.array([0] * size, dtype="intp")
|
| 323 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 324 |
+
|
| 325 |
+
@pytest.mark.parametrize(
|
| 326 |
+
"target",
|
| 327 |
+
[
|
| 328 |
+
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
|
| 329 |
+
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
|
| 330 |
+
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
|
| 331 |
+
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
|
| 332 |
+
["foo", "foo", "bar", "baz"],
|
| 333 |
+
],
|
| 334 |
+
)
|
| 335 |
+
def test_get_indexer_categorical(self, target, ordered):
|
| 336 |
+
# GH 30063: categorical and non-categorical results should be consistent
|
| 337 |
+
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
| 338 |
+
categorical_target = CategoricalIndex(target, ordered=ordered)
|
| 339 |
+
|
| 340 |
+
result = index.get_indexer(categorical_target)
|
| 341 |
+
expected = index.get_indexer(target)
|
| 342 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 343 |
+
|
| 344 |
+
@pytest.mark.filterwarnings(
|
| 345 |
+
"ignore:invalid value encountered in cast:RuntimeWarning"
|
| 346 |
+
)
|
| 347 |
+
def test_get_indexer_categorical_with_nans(self):
|
| 348 |
+
# GH#41934 nans in both index and in target
|
| 349 |
+
ii = IntervalIndex.from_breaks(range(5))
|
| 350 |
+
ii2 = ii.append(IntervalIndex([np.nan]))
|
| 351 |
+
ci2 = CategoricalIndex(ii2)
|
| 352 |
+
|
| 353 |
+
result = ii2.get_indexer(ci2)
|
| 354 |
+
expected = np.arange(5, dtype=np.intp)
|
| 355 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 356 |
+
|
| 357 |
+
# not-all-matches
|
| 358 |
+
result = ii2[1:].get_indexer(ci2[::-1])
|
| 359 |
+
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
|
| 360 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 361 |
+
|
| 362 |
+
# non-unique target, non-unique nans
|
| 363 |
+
result = ii2.get_indexer(ci2.append(ci2))
|
| 364 |
+
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
|
| 365 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 366 |
+
|
| 367 |
+
def test_get_indexer_datetime(self):
|
| 368 |
+
ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4))
|
| 369 |
+
# TODO: with mismatched resolution get_indexer currently raises;
|
| 370 |
+
# this should probably coerce?
|
| 371 |
+
target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]")
|
| 372 |
+
result = ii.get_indexer(target)
|
| 373 |
+
expected = np.array([0], dtype=np.intp)
|
| 374 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 375 |
+
|
| 376 |
+
result = ii.get_indexer(target.astype(str))
|
| 377 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 378 |
+
|
| 379 |
+
# https://github.com/pandas-dev/pandas/issues/47772
|
| 380 |
+
result = ii.get_indexer(target.asi8)
|
| 381 |
+
expected = np.array([-1], dtype=np.intp)
|
| 382 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 383 |
+
|
| 384 |
+
@pytest.mark.parametrize(
|
| 385 |
+
"tuples, closed",
|
| 386 |
+
[
|
| 387 |
+
([(0, 2), (1, 3), (3, 4)], "neither"),
|
| 388 |
+
([(0, 5), (1, 4), (6, 7)], "left"),
|
| 389 |
+
([(0, 1), (0, 1), (1, 2)], "right"),
|
| 390 |
+
([(0, 1), (2, 3), (3, 4)], "both"),
|
| 391 |
+
],
|
| 392 |
+
)
|
| 393 |
+
def test_get_indexer_errors(self, tuples, closed):
|
| 394 |
+
# IntervalIndex needs non-overlapping for uniqueness when querying
|
| 395 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 396 |
+
|
| 397 |
+
msg = (
|
| 398 |
+
"cannot handle overlapping indices; use "
|
| 399 |
+
"IntervalIndex.get_indexer_non_unique"
|
| 400 |
+
)
|
| 401 |
+
with pytest.raises(InvalidIndexError, match=msg):
|
| 402 |
+
index.get_indexer([0, 2])
|
| 403 |
+
|
| 404 |
+
@pytest.mark.parametrize(
|
| 405 |
+
"query, expected",
|
| 406 |
+
[
|
| 407 |
+
([-0.5], ([-1], [0])),
|
| 408 |
+
([0], ([0], [])),
|
| 409 |
+
([0.5], ([0], [])),
|
| 410 |
+
([1], ([0, 1], [])),
|
| 411 |
+
([1.5], ([0, 1], [])),
|
| 412 |
+
([2], ([0, 1, 2], [])),
|
| 413 |
+
([2.5], ([1, 2], [])),
|
| 414 |
+
([3], ([2], [])),
|
| 415 |
+
([3.5], ([2], [])),
|
| 416 |
+
([4], ([-1], [0])),
|
| 417 |
+
([4.5], ([-1], [0])),
|
| 418 |
+
([1, 2], ([0, 1, 0, 1, 2], [])),
|
| 419 |
+
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
|
| 420 |
+
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
|
| 421 |
+
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
|
| 422 |
+
],
|
| 423 |
+
)
|
| 424 |
+
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
|
| 425 |
+
tuples = [(0, 2.5), (1, 3), (2, 4)]
|
| 426 |
+
index = IntervalIndex.from_tuples(tuples, closed="left")
|
| 427 |
+
|
| 428 |
+
result_indexer, result_missing = index.get_indexer_non_unique(query)
|
| 429 |
+
expected_indexer = np.array(expected[0], dtype="intp")
|
| 430 |
+
expected_missing = np.array(expected[1], dtype="intp")
|
| 431 |
+
|
| 432 |
+
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
| 433 |
+
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
| 434 |
+
|
| 435 |
+
# TODO we may also want to test get_indexer for the case when
|
| 436 |
+
# the intervals are duplicated, decreasing, non-monotonic, etc..
|
| 437 |
+
|
| 438 |
+
def test_get_indexer_non_monotonic(self):
|
| 439 |
+
# GH 16410
|
| 440 |
+
idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
|
| 441 |
+
idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
|
| 442 |
+
result = idx1.get_indexer(idx2)
|
| 443 |
+
expected = np.array([2, 0, -1, -1], dtype=np.intp)
|
| 444 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 445 |
+
|
| 446 |
+
result = idx1.get_indexer(idx1[1:])
|
| 447 |
+
expected = np.array([1, 2], dtype=np.intp)
|
| 448 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 449 |
+
|
| 450 |
+
def test_get_indexer_with_nans(self):
|
| 451 |
+
# GH#41831
|
| 452 |
+
index = IntervalIndex([np.nan, np.nan])
|
| 453 |
+
other = IntervalIndex([np.nan])
|
| 454 |
+
|
| 455 |
+
assert not index._index_as_unique
|
| 456 |
+
|
| 457 |
+
result = index.get_indexer_for(other)
|
| 458 |
+
expected = np.array([0, 1], dtype=np.intp)
|
| 459 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 460 |
+
|
| 461 |
+
def test_get_index_non_unique_non_monotonic(self):
|
| 462 |
+
# GH#44084 (root cause)
|
| 463 |
+
index = IntervalIndex.from_tuples(
|
| 464 |
+
[(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
|
| 468 |
+
expected = np.array([1, 3], dtype=np.intp)
|
| 469 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 470 |
+
|
| 471 |
+
def test_get_indexer_multiindex_with_intervals(self):
|
| 472 |
+
# GH#44084 (MultiIndex case as reported)
|
| 473 |
+
interval_index = IntervalIndex.from_tuples(
|
| 474 |
+
[(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
|
| 475 |
+
)
|
| 476 |
+
foo_index = Index([1, 2, 3], name="foo")
|
| 477 |
+
|
| 478 |
+
multi_index = MultiIndex.from_product([foo_index, interval_index])
|
| 479 |
+
|
| 480 |
+
result = multi_index.get_level_values("interval").get_indexer_for(
|
| 481 |
+
[Interval(0.0, 1.0)]
|
| 482 |
+
)
|
| 483 |
+
expected = np.array([1, 4, 7], dtype=np.intp)
|
| 484 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 485 |
+
|
| 486 |
+
@pytest.mark.parametrize("box", [IntervalIndex, array, list])
|
| 487 |
+
def test_get_indexer_interval_index(self, box):
|
| 488 |
+
# GH#30178
|
| 489 |
+
rng = period_range("2022-07-01", freq="D", periods=3)
|
| 490 |
+
idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3))
|
| 491 |
+
|
| 492 |
+
actual = rng.get_indexer(idx)
|
| 493 |
+
expected = np.array([-1, -1, -1], dtype=np.intp)
|
| 494 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 495 |
+
|
| 496 |
+
def test_get_indexer_read_only(self):
|
| 497 |
+
idx = interval_range(start=0, end=5)
|
| 498 |
+
arr = np.array([1, 2])
|
| 499 |
+
arr.flags.writeable = False
|
| 500 |
+
result = idx.get_indexer(arr)
|
| 501 |
+
expected = np.array([0, 1])
|
| 502 |
+
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
| 503 |
+
|
| 504 |
+
result = idx.get_indexer_non_unique(arr)[0]
|
| 505 |
+
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
class TestSliceLocs:
|
| 509 |
+
def test_slice_locs_with_interval(self):
|
| 510 |
+
# increasing monotonically
|
| 511 |
+
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
|
| 512 |
+
|
| 513 |
+
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
| 514 |
+
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
| 515 |
+
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
| 516 |
+
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
|
| 517 |
+
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
|
| 518 |
+
|
| 519 |
+
# decreasing monotonically
|
| 520 |
+
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
|
| 521 |
+
|
| 522 |
+
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
|
| 523 |
+
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
|
| 524 |
+
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
|
| 525 |
+
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
|
| 526 |
+
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
|
| 527 |
+
|
| 528 |
+
# sorted duplicates
|
| 529 |
+
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
|
| 530 |
+
|
| 531 |
+
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
| 532 |
+
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
| 533 |
+
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
| 534 |
+
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
| 535 |
+
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
| 536 |
+
|
| 537 |
+
# unsorted duplicates
|
| 538 |
+
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
|
| 539 |
+
|
| 540 |
+
with pytest.raises(
|
| 541 |
+
KeyError,
|
| 542 |
+
match=re.escape(
|
| 543 |
+
'"Cannot get left slice bound for non-unique label: '
|
| 544 |
+
"Interval(0, 2, closed='right')\""
|
| 545 |
+
),
|
| 546 |
+
):
|
| 547 |
+
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
|
| 548 |
+
|
| 549 |
+
with pytest.raises(
|
| 550 |
+
KeyError,
|
| 551 |
+
match=re.escape(
|
| 552 |
+
'"Cannot get left slice bound for non-unique label: '
|
| 553 |
+
"Interval(0, 2, closed='right')\""
|
| 554 |
+
),
|
| 555 |
+
):
|
| 556 |
+
index.slice_locs(start=Interval(0, 2))
|
| 557 |
+
|
| 558 |
+
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
|
| 559 |
+
|
| 560 |
+
with pytest.raises(
|
| 561 |
+
KeyError,
|
| 562 |
+
match=re.escape(
|
| 563 |
+
'"Cannot get right slice bound for non-unique label: '
|
| 564 |
+
"Interval(0, 2, closed='right')\""
|
| 565 |
+
),
|
| 566 |
+
):
|
| 567 |
+
index.slice_locs(end=Interval(0, 2))
|
| 568 |
+
|
| 569 |
+
with pytest.raises(
|
| 570 |
+
KeyError,
|
| 571 |
+
match=re.escape(
|
| 572 |
+
'"Cannot get right slice bound for non-unique label: '
|
| 573 |
+
"Interval(0, 2, closed='right')\""
|
| 574 |
+
),
|
| 575 |
+
):
|
| 576 |
+
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
|
| 577 |
+
|
| 578 |
+
# another unsorted duplicates
|
| 579 |
+
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
|
| 580 |
+
|
| 581 |
+
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
| 582 |
+
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
|
| 583 |
+
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
| 584 |
+
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
| 585 |
+
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
| 586 |
+
|
| 587 |
+
def test_slice_locs_with_ints_and_floats_succeeds(self):
|
| 588 |
+
# increasing non-overlapping
|
| 589 |
+
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
| 590 |
+
|
| 591 |
+
assert index.slice_locs(0, 1) == (0, 1)
|
| 592 |
+
assert index.slice_locs(0, 2) == (0, 2)
|
| 593 |
+
assert index.slice_locs(0, 3) == (0, 2)
|
| 594 |
+
assert index.slice_locs(3, 1) == (2, 1)
|
| 595 |
+
assert index.slice_locs(3, 4) == (2, 3)
|
| 596 |
+
assert index.slice_locs(0, 4) == (0, 3)
|
| 597 |
+
|
| 598 |
+
# decreasing non-overlapping
|
| 599 |
+
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
|
| 600 |
+
assert index.slice_locs(0, 1) == (3, 3)
|
| 601 |
+
assert index.slice_locs(0, 2) == (3, 2)
|
| 602 |
+
assert index.slice_locs(0, 3) == (3, 1)
|
| 603 |
+
assert index.slice_locs(3, 1) == (1, 3)
|
| 604 |
+
assert index.slice_locs(3, 4) == (1, 1)
|
| 605 |
+
assert index.slice_locs(0, 4) == (3, 1)
|
| 606 |
+
|
| 607 |
+
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
|
| 608 |
+
@pytest.mark.parametrize(
|
| 609 |
+
"tuples",
|
| 610 |
+
[
|
| 611 |
+
[(0, 2), (1, 3), (2, 4)],
|
| 612 |
+
[(2, 4), (1, 3), (0, 2)],
|
| 613 |
+
[(0, 2), (0, 2), (2, 4)],
|
| 614 |
+
[(0, 2), (2, 4), (0, 2)],
|
| 615 |
+
[(0, 2), (0, 2), (2, 4), (1, 3)],
|
| 616 |
+
],
|
| 617 |
+
)
|
| 618 |
+
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
|
| 619 |
+
start, stop = query
|
| 620 |
+
index = IntervalIndex.from_tuples(tuples)
|
| 621 |
+
with pytest.raises(
|
| 622 |
+
KeyError,
|
| 623 |
+
match=(
|
| 624 |
+
"'can only get slices from an IntervalIndex if bounds are "
|
| 625 |
+
"non-overlapping and all monotonic increasing or decreasing'"
|
| 626 |
+
),
|
| 627 |
+
):
|
| 628 |
+
index.slice_locs(start, stop)
|
| 629 |
+
|
| 630 |
+
|
| 631 |
+
class TestPutmask:
|
| 632 |
+
@pytest.mark.parametrize("tz", ["US/Pacific", None])
|
| 633 |
+
def test_putmask_dt64(self, tz):
|
| 634 |
+
# GH#37968
|
| 635 |
+
dti = date_range("2016-01-01", periods=9, tz=tz)
|
| 636 |
+
idx = IntervalIndex.from_breaks(dti)
|
| 637 |
+
mask = np.zeros(idx.shape, dtype=bool)
|
| 638 |
+
mask[0:3] = True
|
| 639 |
+
|
| 640 |
+
result = idx.putmask(mask, idx[-1])
|
| 641 |
+
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
| 642 |
+
tm.assert_index_equal(result, expected)
|
| 643 |
+
|
| 644 |
+
def test_putmask_td64(self):
|
| 645 |
+
# GH#37968
|
| 646 |
+
dti = date_range("2016-01-01", periods=9)
|
| 647 |
+
tdi = dti - dti[0]
|
| 648 |
+
idx = IntervalIndex.from_breaks(tdi)
|
| 649 |
+
mask = np.zeros(idx.shape, dtype=bool)
|
| 650 |
+
mask[0:3] = True
|
| 651 |
+
|
| 652 |
+
result = idx.putmask(mask, idx[-1])
|
| 653 |
+
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
| 654 |
+
tm.assert_index_equal(result, expected)
|
| 655 |
+
|
| 656 |
+
|
| 657 |
+
class TestContains:
|
| 658 |
+
# .__contains__, not .contains
|
| 659 |
+
|
| 660 |
+
def test_contains_dunder(self):
|
| 661 |
+
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
|
| 662 |
+
|
| 663 |
+
# __contains__ requires perfect matches to intervals.
|
| 664 |
+
assert 0 not in index
|
| 665 |
+
assert 1 not in index
|
| 666 |
+
assert 2 not in index
|
| 667 |
+
|
| 668 |
+
assert Interval(0, 1, closed="right") in index
|
| 669 |
+
assert Interval(0, 2, closed="right") not in index
|
| 670 |
+
assert Interval(0, 0.5, closed="right") not in index
|
| 671 |
+
assert Interval(3, 5, closed="right") not in index
|
| 672 |
+
assert Interval(-1, 0, closed="left") not in index
|
| 673 |
+
assert Interval(0, 1, closed="left") not in index
|
| 674 |
+
assert Interval(0, 1, closed="both") not in index
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval.py
ADDED
|
@@ -0,0 +1,918 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from itertools import permutations
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from pandas import (
|
| 9 |
+
Index,
|
| 10 |
+
Interval,
|
| 11 |
+
IntervalIndex,
|
| 12 |
+
Timedelta,
|
| 13 |
+
Timestamp,
|
| 14 |
+
date_range,
|
| 15 |
+
interval_range,
|
| 16 |
+
isna,
|
| 17 |
+
notna,
|
| 18 |
+
timedelta_range,
|
| 19 |
+
)
|
| 20 |
+
import pandas._testing as tm
|
| 21 |
+
import pandas.core.common as com
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@pytest.fixture(params=[None, "foo"])
|
| 25 |
+
def name(request):
|
| 26 |
+
return request.param
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class TestIntervalIndex:
|
| 30 |
+
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
| 31 |
+
|
| 32 |
+
def create_index(self, closed="right"):
|
| 33 |
+
return IntervalIndex.from_breaks(range(11), closed=closed)
|
| 34 |
+
|
| 35 |
+
def create_index_with_nan(self, closed="right"):
|
| 36 |
+
mask = [True, False] + [True] * 8
|
| 37 |
+
return IntervalIndex.from_arrays(
|
| 38 |
+
np.where(mask, np.arange(10), np.nan),
|
| 39 |
+
np.where(mask, np.arange(1, 11), np.nan),
|
| 40 |
+
closed=closed,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
def test_properties(self, closed):
|
| 44 |
+
index = self.create_index(closed=closed)
|
| 45 |
+
assert len(index) == 10
|
| 46 |
+
assert index.size == 10
|
| 47 |
+
assert index.shape == (10,)
|
| 48 |
+
|
| 49 |
+
tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
|
| 50 |
+
tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
|
| 51 |
+
tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
|
| 52 |
+
|
| 53 |
+
assert index.closed == closed
|
| 54 |
+
|
| 55 |
+
ivs = [
|
| 56 |
+
Interval(left, right, closed)
|
| 57 |
+
for left, right in zip(range(10), range(1, 11))
|
| 58 |
+
]
|
| 59 |
+
expected = np.array(ivs, dtype=object)
|
| 60 |
+
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
| 61 |
+
|
| 62 |
+
# with nans
|
| 63 |
+
index = self.create_index_with_nan(closed=closed)
|
| 64 |
+
assert len(index) == 10
|
| 65 |
+
assert index.size == 10
|
| 66 |
+
assert index.shape == (10,)
|
| 67 |
+
|
| 68 |
+
expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
|
| 69 |
+
expected_right = expected_left + 1
|
| 70 |
+
expected_mid = expected_left + 0.5
|
| 71 |
+
tm.assert_index_equal(index.left, expected_left)
|
| 72 |
+
tm.assert_index_equal(index.right, expected_right)
|
| 73 |
+
tm.assert_index_equal(index.mid, expected_mid)
|
| 74 |
+
|
| 75 |
+
assert index.closed == closed
|
| 76 |
+
|
| 77 |
+
ivs = [
|
| 78 |
+
Interval(left, right, closed) if notna(left) else np.nan
|
| 79 |
+
for left, right in zip(expected_left, expected_right)
|
| 80 |
+
]
|
| 81 |
+
expected = np.array(ivs, dtype=object)
|
| 82 |
+
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
| 83 |
+
|
| 84 |
+
@pytest.mark.parametrize(
|
| 85 |
+
"breaks",
|
| 86 |
+
[
|
| 87 |
+
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
|
| 88 |
+
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
|
| 89 |
+
date_range("2017-01-01", "2017-01-04"),
|
| 90 |
+
pytest.param(
|
| 91 |
+
date_range("2017-01-01", "2017-01-04", unit="s"),
|
| 92 |
+
marks=pytest.mark.xfail(reason="mismatched result unit"),
|
| 93 |
+
),
|
| 94 |
+
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]),
|
| 95 |
+
],
|
| 96 |
+
)
|
| 97 |
+
def test_length(self, closed, breaks):
|
| 98 |
+
# GH 18789
|
| 99 |
+
index = IntervalIndex.from_breaks(breaks, closed=closed)
|
| 100 |
+
result = index.length
|
| 101 |
+
expected = Index(iv.length for iv in index)
|
| 102 |
+
tm.assert_index_equal(result, expected)
|
| 103 |
+
|
| 104 |
+
# with NA
|
| 105 |
+
index = index.insert(1, np.nan)
|
| 106 |
+
result = index.length
|
| 107 |
+
expected = Index(iv.length if notna(iv) else iv for iv in index)
|
| 108 |
+
tm.assert_index_equal(result, expected)
|
| 109 |
+
|
| 110 |
+
def test_with_nans(self, closed):
|
| 111 |
+
index = self.create_index(closed=closed)
|
| 112 |
+
assert index.hasnans is False
|
| 113 |
+
|
| 114 |
+
result = index.isna()
|
| 115 |
+
expected = np.zeros(len(index), dtype=bool)
|
| 116 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 117 |
+
|
| 118 |
+
result = index.notna()
|
| 119 |
+
expected = np.ones(len(index), dtype=bool)
|
| 120 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 121 |
+
|
| 122 |
+
index = self.create_index_with_nan(closed=closed)
|
| 123 |
+
assert index.hasnans is True
|
| 124 |
+
|
| 125 |
+
result = index.isna()
|
| 126 |
+
expected = np.array([False, True] + [False] * (len(index) - 2))
|
| 127 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 128 |
+
|
| 129 |
+
result = index.notna()
|
| 130 |
+
expected = np.array([True, False] + [True] * (len(index) - 2))
|
| 131 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 132 |
+
|
| 133 |
+
def test_copy(self, closed):
|
| 134 |
+
expected = self.create_index(closed=closed)
|
| 135 |
+
|
| 136 |
+
result = expected.copy()
|
| 137 |
+
assert result.equals(expected)
|
| 138 |
+
|
| 139 |
+
result = expected.copy(deep=True)
|
| 140 |
+
assert result.equals(expected)
|
| 141 |
+
assert result.left is not expected.left
|
| 142 |
+
|
| 143 |
+
def test_ensure_copied_data(self, closed):
|
| 144 |
+
# exercise the copy flag in the constructor
|
| 145 |
+
|
| 146 |
+
# not copying
|
| 147 |
+
index = self.create_index(closed=closed)
|
| 148 |
+
result = IntervalIndex(index, copy=False)
|
| 149 |
+
tm.assert_numpy_array_equal(
|
| 150 |
+
index.left.values, result.left.values, check_same="same"
|
| 151 |
+
)
|
| 152 |
+
tm.assert_numpy_array_equal(
|
| 153 |
+
index.right.values, result.right.values, check_same="same"
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
# by-definition make a copy
|
| 157 |
+
result = IntervalIndex(np.array(index), copy=False)
|
| 158 |
+
tm.assert_numpy_array_equal(
|
| 159 |
+
index.left.values, result.left.values, check_same="copy"
|
| 160 |
+
)
|
| 161 |
+
tm.assert_numpy_array_equal(
|
| 162 |
+
index.right.values, result.right.values, check_same="copy"
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
def test_delete(self, closed):
|
| 166 |
+
breaks = np.arange(1, 11, dtype=np.int64)
|
| 167 |
+
expected = IntervalIndex.from_breaks(breaks, closed=closed)
|
| 168 |
+
result = self.create_index(closed=closed).delete(0)
|
| 169 |
+
tm.assert_index_equal(result, expected)
|
| 170 |
+
|
| 171 |
+
@pytest.mark.parametrize(
|
| 172 |
+
"data",
|
| 173 |
+
[
|
| 174 |
+
interval_range(0, periods=10, closed="neither"),
|
| 175 |
+
interval_range(1.7, periods=8, freq=2.5, closed="both"),
|
| 176 |
+
interval_range(Timestamp("20170101"), periods=12, closed="left"),
|
| 177 |
+
interval_range(Timedelta("1 day"), periods=6, closed="right"),
|
| 178 |
+
],
|
| 179 |
+
)
|
| 180 |
+
def test_insert(self, data):
|
| 181 |
+
item = data[0]
|
| 182 |
+
idx_item = IntervalIndex([item])
|
| 183 |
+
|
| 184 |
+
# start
|
| 185 |
+
expected = idx_item.append(data)
|
| 186 |
+
result = data.insert(0, item)
|
| 187 |
+
tm.assert_index_equal(result, expected)
|
| 188 |
+
|
| 189 |
+
# end
|
| 190 |
+
expected = data.append(idx_item)
|
| 191 |
+
result = data.insert(len(data), item)
|
| 192 |
+
tm.assert_index_equal(result, expected)
|
| 193 |
+
|
| 194 |
+
# mid
|
| 195 |
+
expected = data[:3].append(idx_item).append(data[3:])
|
| 196 |
+
result = data.insert(3, item)
|
| 197 |
+
tm.assert_index_equal(result, expected)
|
| 198 |
+
|
| 199 |
+
# invalid type
|
| 200 |
+
res = data.insert(1, "foo")
|
| 201 |
+
expected = data.astype(object).insert(1, "foo")
|
| 202 |
+
tm.assert_index_equal(res, expected)
|
| 203 |
+
|
| 204 |
+
msg = "can only insert Interval objects and NA into an IntervalArray"
|
| 205 |
+
with pytest.raises(TypeError, match=msg):
|
| 206 |
+
data._data.insert(1, "foo")
|
| 207 |
+
|
| 208 |
+
# invalid closed
|
| 209 |
+
msg = "'value.closed' is 'left', expected 'right'."
|
| 210 |
+
for closed in {"left", "right", "both", "neither"} - {item.closed}:
|
| 211 |
+
msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
|
| 212 |
+
bad_item = Interval(item.left, item.right, closed=closed)
|
| 213 |
+
res = data.insert(1, bad_item)
|
| 214 |
+
expected = data.astype(object).insert(1, bad_item)
|
| 215 |
+
tm.assert_index_equal(res, expected)
|
| 216 |
+
with pytest.raises(ValueError, match=msg):
|
| 217 |
+
data._data.insert(1, bad_item)
|
| 218 |
+
|
| 219 |
+
# GH 18295 (test missing)
|
| 220 |
+
na_idx = IntervalIndex([np.nan], closed=data.closed)
|
| 221 |
+
for na in [np.nan, None, pd.NA]:
|
| 222 |
+
expected = data[:1].append(na_idx).append(data[1:])
|
| 223 |
+
result = data.insert(1, na)
|
| 224 |
+
tm.assert_index_equal(result, expected)
|
| 225 |
+
|
| 226 |
+
if data.left.dtype.kind not in ["m", "M"]:
|
| 227 |
+
# trying to insert pd.NaT into a numeric-dtyped Index should cast
|
| 228 |
+
expected = data.astype(object).insert(1, pd.NaT)
|
| 229 |
+
|
| 230 |
+
msg = "can only insert Interval objects and NA into an IntervalArray"
|
| 231 |
+
with pytest.raises(TypeError, match=msg):
|
| 232 |
+
data._data.insert(1, pd.NaT)
|
| 233 |
+
|
| 234 |
+
result = data.insert(1, pd.NaT)
|
| 235 |
+
tm.assert_index_equal(result, expected)
|
| 236 |
+
|
| 237 |
+
def test_is_unique_interval(self, closed):
|
| 238 |
+
"""
|
| 239 |
+
Interval specific tests for is_unique in addition to base class tests
|
| 240 |
+
"""
|
| 241 |
+
# unique overlapping - distinct endpoints
|
| 242 |
+
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
|
| 243 |
+
assert idx.is_unique is True
|
| 244 |
+
|
| 245 |
+
# unique overlapping - shared endpoints
|
| 246 |
+
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
| 247 |
+
assert idx.is_unique is True
|
| 248 |
+
|
| 249 |
+
# unique nested
|
| 250 |
+
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
|
| 251 |
+
assert idx.is_unique is True
|
| 252 |
+
|
| 253 |
+
# unique NaN
|
| 254 |
+
idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed)
|
| 255 |
+
assert idx.is_unique is True
|
| 256 |
+
|
| 257 |
+
# non-unique NaN
|
| 258 |
+
idx = IntervalIndex.from_tuples(
|
| 259 |
+
[(np.nan, np.nan), (np.nan, np.nan)], closed=closed
|
| 260 |
+
)
|
| 261 |
+
assert idx.is_unique is False
|
| 262 |
+
|
| 263 |
+
def test_monotonic(self, closed):
|
| 264 |
+
# increasing non-overlapping
|
| 265 |
+
idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
|
| 266 |
+
assert idx.is_monotonic_increasing is True
|
| 267 |
+
assert idx._is_strictly_monotonic_increasing is True
|
| 268 |
+
assert idx.is_monotonic_decreasing is False
|
| 269 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 270 |
+
|
| 271 |
+
# decreasing non-overlapping
|
| 272 |
+
idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
|
| 273 |
+
assert idx.is_monotonic_increasing is False
|
| 274 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 275 |
+
assert idx.is_monotonic_decreasing is True
|
| 276 |
+
assert idx._is_strictly_monotonic_decreasing is True
|
| 277 |
+
|
| 278 |
+
# unordered non-overlapping
|
| 279 |
+
idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
|
| 280 |
+
assert idx.is_monotonic_increasing is False
|
| 281 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 282 |
+
assert idx.is_monotonic_decreasing is False
|
| 283 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 284 |
+
|
| 285 |
+
# increasing overlapping
|
| 286 |
+
idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
|
| 287 |
+
assert idx.is_monotonic_increasing is True
|
| 288 |
+
assert idx._is_strictly_monotonic_increasing is True
|
| 289 |
+
assert idx.is_monotonic_decreasing is False
|
| 290 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 291 |
+
|
| 292 |
+
# decreasing overlapping
|
| 293 |
+
idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
|
| 294 |
+
assert idx.is_monotonic_increasing is False
|
| 295 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 296 |
+
assert idx.is_monotonic_decreasing is True
|
| 297 |
+
assert idx._is_strictly_monotonic_decreasing is True
|
| 298 |
+
|
| 299 |
+
# unordered overlapping
|
| 300 |
+
idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
|
| 301 |
+
assert idx.is_monotonic_increasing is False
|
| 302 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 303 |
+
assert idx.is_monotonic_decreasing is False
|
| 304 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 305 |
+
|
| 306 |
+
# increasing overlapping shared endpoints
|
| 307 |
+
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
| 308 |
+
assert idx.is_monotonic_increasing is True
|
| 309 |
+
assert idx._is_strictly_monotonic_increasing is True
|
| 310 |
+
assert idx.is_monotonic_decreasing is False
|
| 311 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 312 |
+
|
| 313 |
+
# decreasing overlapping shared endpoints
|
| 314 |
+
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
|
| 315 |
+
assert idx.is_monotonic_increasing is False
|
| 316 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 317 |
+
assert idx.is_monotonic_decreasing is True
|
| 318 |
+
assert idx._is_strictly_monotonic_decreasing is True
|
| 319 |
+
|
| 320 |
+
# stationary
|
| 321 |
+
idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
|
| 322 |
+
assert idx.is_monotonic_increasing is True
|
| 323 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 324 |
+
assert idx.is_monotonic_decreasing is True
|
| 325 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 326 |
+
|
| 327 |
+
# empty
|
| 328 |
+
idx = IntervalIndex([], closed=closed)
|
| 329 |
+
assert idx.is_monotonic_increasing is True
|
| 330 |
+
assert idx._is_strictly_monotonic_increasing is True
|
| 331 |
+
assert idx.is_monotonic_decreasing is True
|
| 332 |
+
assert idx._is_strictly_monotonic_decreasing is True
|
| 333 |
+
|
| 334 |
+
def test_is_monotonic_with_nans(self):
|
| 335 |
+
# GH#41831
|
| 336 |
+
index = IntervalIndex([np.nan, np.nan])
|
| 337 |
+
|
| 338 |
+
assert not index.is_monotonic_increasing
|
| 339 |
+
assert not index._is_strictly_monotonic_increasing
|
| 340 |
+
assert not index.is_monotonic_increasing
|
| 341 |
+
assert not index._is_strictly_monotonic_decreasing
|
| 342 |
+
assert not index.is_monotonic_decreasing
|
| 343 |
+
|
| 344 |
+
@pytest.mark.parametrize(
|
| 345 |
+
"breaks",
|
| 346 |
+
[
|
| 347 |
+
date_range("20180101", periods=4),
|
| 348 |
+
date_range("20180101", periods=4, tz="US/Eastern"),
|
| 349 |
+
timedelta_range("0 days", periods=4),
|
| 350 |
+
],
|
| 351 |
+
ids=lambda x: str(x.dtype),
|
| 352 |
+
)
|
| 353 |
+
def test_maybe_convert_i8(self, breaks):
|
| 354 |
+
# GH 20636
|
| 355 |
+
index = IntervalIndex.from_breaks(breaks)
|
| 356 |
+
|
| 357 |
+
# intervalindex
|
| 358 |
+
result = index._maybe_convert_i8(index)
|
| 359 |
+
expected = IntervalIndex.from_breaks(breaks.asi8)
|
| 360 |
+
tm.assert_index_equal(result, expected)
|
| 361 |
+
|
| 362 |
+
# interval
|
| 363 |
+
interval = Interval(breaks[0], breaks[1])
|
| 364 |
+
result = index._maybe_convert_i8(interval)
|
| 365 |
+
expected = Interval(breaks[0]._value, breaks[1]._value)
|
| 366 |
+
assert result == expected
|
| 367 |
+
|
| 368 |
+
# datetimelike index
|
| 369 |
+
result = index._maybe_convert_i8(breaks)
|
| 370 |
+
expected = Index(breaks.asi8)
|
| 371 |
+
tm.assert_index_equal(result, expected)
|
| 372 |
+
|
| 373 |
+
# datetimelike scalar
|
| 374 |
+
result = index._maybe_convert_i8(breaks[0])
|
| 375 |
+
expected = breaks[0]._value
|
| 376 |
+
assert result == expected
|
| 377 |
+
|
| 378 |
+
# list-like of datetimelike scalars
|
| 379 |
+
result = index._maybe_convert_i8(list(breaks))
|
| 380 |
+
expected = Index(breaks.asi8)
|
| 381 |
+
tm.assert_index_equal(result, expected)
|
| 382 |
+
|
| 383 |
+
@pytest.mark.parametrize(
|
| 384 |
+
"breaks",
|
| 385 |
+
[date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
|
| 386 |
+
)
|
| 387 |
+
def test_maybe_convert_i8_nat(self, breaks):
|
| 388 |
+
# GH 20636
|
| 389 |
+
index = IntervalIndex.from_breaks(breaks)
|
| 390 |
+
|
| 391 |
+
to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns")
|
| 392 |
+
expected = Index([np.nan] * 3, dtype=np.float64)
|
| 393 |
+
result = index._maybe_convert_i8(to_convert)
|
| 394 |
+
tm.assert_index_equal(result, expected)
|
| 395 |
+
|
| 396 |
+
to_convert = to_convert.insert(0, breaks[0])
|
| 397 |
+
expected = expected.insert(0, float(breaks[0]._value))
|
| 398 |
+
result = index._maybe_convert_i8(to_convert)
|
| 399 |
+
tm.assert_index_equal(result, expected)
|
| 400 |
+
|
| 401 |
+
@pytest.mark.parametrize(
|
| 402 |
+
"make_key",
|
| 403 |
+
[lambda breaks: breaks, list],
|
| 404 |
+
ids=["lambda", "list"],
|
| 405 |
+
)
|
| 406 |
+
def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
|
| 407 |
+
# GH 20636
|
| 408 |
+
breaks = np.arange(5, dtype=any_real_numpy_dtype)
|
| 409 |
+
index = IntervalIndex.from_breaks(breaks)
|
| 410 |
+
key = make_key(breaks)
|
| 411 |
+
|
| 412 |
+
result = index._maybe_convert_i8(key)
|
| 413 |
+
kind = breaks.dtype.kind
|
| 414 |
+
expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
|
| 415 |
+
expected = Index(key, dtype=expected_dtype)
|
| 416 |
+
tm.assert_index_equal(result, expected)
|
| 417 |
+
|
| 418 |
+
@pytest.mark.parametrize(
|
| 419 |
+
"make_key",
|
| 420 |
+
[
|
| 421 |
+
IntervalIndex.from_breaks,
|
| 422 |
+
lambda breaks: Interval(breaks[0], breaks[1]),
|
| 423 |
+
lambda breaks: breaks[0],
|
| 424 |
+
],
|
| 425 |
+
ids=["IntervalIndex", "Interval", "scalar"],
|
| 426 |
+
)
|
| 427 |
+
def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
|
| 428 |
+
# GH 20636
|
| 429 |
+
breaks = np.arange(5, dtype=any_real_numpy_dtype)
|
| 430 |
+
index = IntervalIndex.from_breaks(breaks)
|
| 431 |
+
key = make_key(breaks)
|
| 432 |
+
|
| 433 |
+
# test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
|
| 434 |
+
result = index._maybe_convert_i8(key)
|
| 435 |
+
assert result is key
|
| 436 |
+
|
| 437 |
+
@pytest.mark.parametrize(
|
| 438 |
+
"breaks1, breaks2",
|
| 439 |
+
permutations(
|
| 440 |
+
[
|
| 441 |
+
date_range("20180101", periods=4),
|
| 442 |
+
date_range("20180101", periods=4, tz="US/Eastern"),
|
| 443 |
+
timedelta_range("0 days", periods=4),
|
| 444 |
+
],
|
| 445 |
+
2,
|
| 446 |
+
),
|
| 447 |
+
ids=lambda x: str(x.dtype),
|
| 448 |
+
)
|
| 449 |
+
@pytest.mark.parametrize(
|
| 450 |
+
"make_key",
|
| 451 |
+
[
|
| 452 |
+
IntervalIndex.from_breaks,
|
| 453 |
+
lambda breaks: Interval(breaks[0], breaks[1]),
|
| 454 |
+
lambda breaks: breaks,
|
| 455 |
+
lambda breaks: breaks[0],
|
| 456 |
+
list,
|
| 457 |
+
],
|
| 458 |
+
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
|
| 459 |
+
)
|
| 460 |
+
def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
|
| 461 |
+
# GH 20636
|
| 462 |
+
index = IntervalIndex.from_breaks(breaks1)
|
| 463 |
+
key = make_key(breaks2)
|
| 464 |
+
|
| 465 |
+
msg = (
|
| 466 |
+
f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
|
| 467 |
+
f"values of dtype {breaks2.dtype}"
|
| 468 |
+
)
|
| 469 |
+
msg = re.escape(msg)
|
| 470 |
+
with pytest.raises(ValueError, match=msg):
|
| 471 |
+
index._maybe_convert_i8(key)
|
| 472 |
+
|
| 473 |
+
def test_contains_method(self):
|
| 474 |
+
# can select values that are IN the range of a value
|
| 475 |
+
i = IntervalIndex.from_arrays([0, 1], [1, 2])
|
| 476 |
+
|
| 477 |
+
expected = np.array([False, False], dtype="bool")
|
| 478 |
+
actual = i.contains(0)
|
| 479 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 480 |
+
actual = i.contains(3)
|
| 481 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 482 |
+
|
| 483 |
+
expected = np.array([True, False], dtype="bool")
|
| 484 |
+
actual = i.contains(0.5)
|
| 485 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 486 |
+
actual = i.contains(1)
|
| 487 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 488 |
+
|
| 489 |
+
# __contains__ not implemented for "interval in interval", follow
|
| 490 |
+
# that for the contains method for now
|
| 491 |
+
with pytest.raises(
|
| 492 |
+
NotImplementedError, match="contains not implemented for two"
|
| 493 |
+
):
|
| 494 |
+
i.contains(Interval(0, 1))
|
| 495 |
+
|
| 496 |
+
def test_dropna(self, closed):
|
| 497 |
+
expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
|
| 498 |
+
|
| 499 |
+
ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
|
| 500 |
+
result = ii.dropna()
|
| 501 |
+
tm.assert_index_equal(result, expected)
|
| 502 |
+
|
| 503 |
+
ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
|
| 504 |
+
result = ii.dropna()
|
| 505 |
+
tm.assert_index_equal(result, expected)
|
| 506 |
+
|
| 507 |
+
def test_non_contiguous(self, closed):
|
| 508 |
+
index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
| 509 |
+
target = [0.5, 1.5, 2.5]
|
| 510 |
+
actual = index.get_indexer(target)
|
| 511 |
+
expected = np.array([0, -1, 1], dtype="intp")
|
| 512 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 513 |
+
|
| 514 |
+
assert 1.5 not in index
|
| 515 |
+
|
| 516 |
+
def test_isin(self, closed):
|
| 517 |
+
index = self.create_index(closed=closed)
|
| 518 |
+
|
| 519 |
+
expected = np.array([True] + [False] * (len(index) - 1))
|
| 520 |
+
result = index.isin(index[:1])
|
| 521 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 522 |
+
|
| 523 |
+
result = index.isin([index[0]])
|
| 524 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 525 |
+
|
| 526 |
+
other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
|
| 527 |
+
expected = np.array([True] * (len(index) - 1) + [False])
|
| 528 |
+
result = index.isin(other)
|
| 529 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 530 |
+
|
| 531 |
+
result = index.isin(other.tolist())
|
| 532 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 533 |
+
|
| 534 |
+
for other_closed in ["right", "left", "both", "neither"]:
|
| 535 |
+
other = self.create_index(closed=other_closed)
|
| 536 |
+
expected = np.repeat(closed == other_closed, len(index))
|
| 537 |
+
result = index.isin(other)
|
| 538 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 539 |
+
|
| 540 |
+
result = index.isin(other.tolist())
|
| 541 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 542 |
+
|
| 543 |
+
def test_comparison(self):
|
| 544 |
+
actual = Interval(0, 1) < self.index
|
| 545 |
+
expected = np.array([False, True])
|
| 546 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 547 |
+
|
| 548 |
+
actual = Interval(0.5, 1.5) < self.index
|
| 549 |
+
expected = np.array([False, True])
|
| 550 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 551 |
+
actual = self.index > Interval(0.5, 1.5)
|
| 552 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 553 |
+
|
| 554 |
+
actual = self.index == self.index
|
| 555 |
+
expected = np.array([True, True])
|
| 556 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 557 |
+
actual = self.index <= self.index
|
| 558 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 559 |
+
actual = self.index >= self.index
|
| 560 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 561 |
+
|
| 562 |
+
actual = self.index < self.index
|
| 563 |
+
expected = np.array([False, False])
|
| 564 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 565 |
+
actual = self.index > self.index
|
| 566 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 567 |
+
|
| 568 |
+
actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
|
| 569 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 570 |
+
|
| 571 |
+
actual = self.index == self.index.values
|
| 572 |
+
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
| 573 |
+
actual = self.index.values == self.index
|
| 574 |
+
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
| 575 |
+
actual = self.index <= self.index.values
|
| 576 |
+
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
| 577 |
+
actual = self.index != self.index.values
|
| 578 |
+
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
| 579 |
+
actual = self.index > self.index.values
|
| 580 |
+
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
| 581 |
+
actual = self.index.values > self.index
|
| 582 |
+
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
| 583 |
+
|
| 584 |
+
# invalid comparisons
|
| 585 |
+
actual = self.index == 0
|
| 586 |
+
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
| 587 |
+
actual = self.index == self.index.left
|
| 588 |
+
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
| 589 |
+
|
| 590 |
+
msg = "|".join(
|
| 591 |
+
[
|
| 592 |
+
"not supported between instances of 'int' and '.*.Interval'",
|
| 593 |
+
r"Invalid comparison between dtype=interval\[int64, right\] and ",
|
| 594 |
+
]
|
| 595 |
+
)
|
| 596 |
+
with pytest.raises(TypeError, match=msg):
|
| 597 |
+
self.index > 0
|
| 598 |
+
with pytest.raises(TypeError, match=msg):
|
| 599 |
+
self.index <= 0
|
| 600 |
+
with pytest.raises(TypeError, match=msg):
|
| 601 |
+
self.index > np.arange(2)
|
| 602 |
+
|
| 603 |
+
msg = "Lengths must match to compare"
|
| 604 |
+
with pytest.raises(ValueError, match=msg):
|
| 605 |
+
self.index > np.arange(3)
|
| 606 |
+
|
| 607 |
+
def test_missing_values(self, closed):
|
| 608 |
+
idx = Index(
|
| 609 |
+
[np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
|
| 610 |
+
)
|
| 611 |
+
idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
|
| 612 |
+
assert idx.equals(idx2)
|
| 613 |
+
|
| 614 |
+
msg = (
|
| 615 |
+
"missing values must be missing in the same location both left "
|
| 616 |
+
"and right sides"
|
| 617 |
+
)
|
| 618 |
+
with pytest.raises(ValueError, match=msg):
|
| 619 |
+
IntervalIndex.from_arrays(
|
| 620 |
+
[np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
|
| 624 |
+
|
| 625 |
+
def test_sort_values(self, closed):
|
| 626 |
+
index = self.create_index(closed=closed)
|
| 627 |
+
|
| 628 |
+
result = index.sort_values()
|
| 629 |
+
tm.assert_index_equal(result, index)
|
| 630 |
+
|
| 631 |
+
result = index.sort_values(ascending=False)
|
| 632 |
+
tm.assert_index_equal(result, index[::-1])
|
| 633 |
+
|
| 634 |
+
# with nan
|
| 635 |
+
index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
|
| 636 |
+
|
| 637 |
+
result = index.sort_values()
|
| 638 |
+
expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
|
| 639 |
+
tm.assert_index_equal(result, expected)
|
| 640 |
+
|
| 641 |
+
result = index.sort_values(ascending=False, na_position="first")
|
| 642 |
+
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
|
| 643 |
+
tm.assert_index_equal(result, expected)
|
| 644 |
+
|
| 645 |
+
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
| 646 |
+
def test_datetime(self, tz):
|
| 647 |
+
start = Timestamp("2000-01-01", tz=tz)
|
| 648 |
+
dates = date_range(start=start, periods=10)
|
| 649 |
+
index = IntervalIndex.from_breaks(dates)
|
| 650 |
+
|
| 651 |
+
# test mid
|
| 652 |
+
start = Timestamp("2000-01-01T12:00", tz=tz)
|
| 653 |
+
expected = date_range(start=start, periods=9)
|
| 654 |
+
tm.assert_index_equal(index.mid, expected)
|
| 655 |
+
|
| 656 |
+
# __contains__ doesn't check individual points
|
| 657 |
+
assert Timestamp("2000-01-01", tz=tz) not in index
|
| 658 |
+
assert Timestamp("2000-01-01T12", tz=tz) not in index
|
| 659 |
+
assert Timestamp("2000-01-02", tz=tz) not in index
|
| 660 |
+
iv_true = Interval(
|
| 661 |
+
Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
|
| 662 |
+
)
|
| 663 |
+
iv_false = Interval(
|
| 664 |
+
Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
|
| 665 |
+
)
|
| 666 |
+
assert iv_true in index
|
| 667 |
+
assert iv_false not in index
|
| 668 |
+
|
| 669 |
+
# .contains does check individual points
|
| 670 |
+
assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
|
| 671 |
+
assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
|
| 672 |
+
assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
|
| 673 |
+
|
| 674 |
+
# test get_indexer
|
| 675 |
+
start = Timestamp("1999-12-31T12:00", tz=tz)
|
| 676 |
+
target = date_range(start=start, periods=7, freq="12h")
|
| 677 |
+
actual = index.get_indexer(target)
|
| 678 |
+
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
|
| 679 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 680 |
+
|
| 681 |
+
start = Timestamp("2000-01-08T18:00", tz=tz)
|
| 682 |
+
target = date_range(start=start, periods=7, freq="6h")
|
| 683 |
+
actual = index.get_indexer(target)
|
| 684 |
+
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
|
| 685 |
+
tm.assert_numpy_array_equal(actual, expected)
|
| 686 |
+
|
| 687 |
+
def test_append(self, closed):
|
| 688 |
+
index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
|
| 689 |
+
index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
|
| 690 |
+
|
| 691 |
+
result = index1.append(index2)
|
| 692 |
+
expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
|
| 693 |
+
tm.assert_index_equal(result, expected)
|
| 694 |
+
|
| 695 |
+
result = index1.append([index1, index2])
|
| 696 |
+
expected = IntervalIndex.from_arrays(
|
| 697 |
+
[0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
|
| 698 |
+
)
|
| 699 |
+
tm.assert_index_equal(result, expected)
|
| 700 |
+
|
| 701 |
+
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
| 702 |
+
index_other_closed = IntervalIndex.from_arrays(
|
| 703 |
+
[0, 1], [1, 2], closed=other_closed
|
| 704 |
+
)
|
| 705 |
+
result = index1.append(index_other_closed)
|
| 706 |
+
expected = index1.astype(object).append(index_other_closed.astype(object))
|
| 707 |
+
tm.assert_index_equal(result, expected)
|
| 708 |
+
|
| 709 |
+
def test_is_non_overlapping_monotonic(self, closed):
|
| 710 |
+
# Should be True in all cases
|
| 711 |
+
tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
|
| 712 |
+
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
| 713 |
+
assert idx.is_non_overlapping_monotonic is True
|
| 714 |
+
|
| 715 |
+
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
| 716 |
+
assert idx.is_non_overlapping_monotonic is True
|
| 717 |
+
|
| 718 |
+
# Should be False in all cases (overlapping)
|
| 719 |
+
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
|
| 720 |
+
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
| 721 |
+
assert idx.is_non_overlapping_monotonic is False
|
| 722 |
+
|
| 723 |
+
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
| 724 |
+
assert idx.is_non_overlapping_monotonic is False
|
| 725 |
+
|
| 726 |
+
# Should be False in all cases (non-monotonic)
|
| 727 |
+
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
|
| 728 |
+
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
| 729 |
+
assert idx.is_non_overlapping_monotonic is False
|
| 730 |
+
|
| 731 |
+
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
| 732 |
+
assert idx.is_non_overlapping_monotonic is False
|
| 733 |
+
|
| 734 |
+
# Should be False for closed='both', otherwise True (GH16560)
|
| 735 |
+
if closed == "both":
|
| 736 |
+
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
| 737 |
+
assert idx.is_non_overlapping_monotonic is False
|
| 738 |
+
else:
|
| 739 |
+
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
| 740 |
+
assert idx.is_non_overlapping_monotonic is True
|
| 741 |
+
|
| 742 |
+
@pytest.mark.parametrize(
|
| 743 |
+
"start, shift, na_value",
|
| 744 |
+
[
|
| 745 |
+
(0, 1, np.nan),
|
| 746 |
+
(Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
|
| 747 |
+
(Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
|
| 748 |
+
],
|
| 749 |
+
)
|
| 750 |
+
def test_is_overlapping(self, start, shift, na_value, closed):
|
| 751 |
+
# GH 23309
|
| 752 |
+
# see test_interval_tree.py for extensive tests; interface tests here
|
| 753 |
+
|
| 754 |
+
# non-overlapping
|
| 755 |
+
tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
|
| 756 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 757 |
+
assert index.is_overlapping is False
|
| 758 |
+
|
| 759 |
+
# non-overlapping with NA
|
| 760 |
+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
| 761 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 762 |
+
assert index.is_overlapping is False
|
| 763 |
+
|
| 764 |
+
# overlapping
|
| 765 |
+
tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
|
| 766 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 767 |
+
assert index.is_overlapping is True
|
| 768 |
+
|
| 769 |
+
# overlapping with NA
|
| 770 |
+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
| 771 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 772 |
+
assert index.is_overlapping is True
|
| 773 |
+
|
| 774 |
+
# common endpoints
|
| 775 |
+
tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
|
| 776 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 777 |
+
result = index.is_overlapping
|
| 778 |
+
expected = closed == "both"
|
| 779 |
+
assert result is expected
|
| 780 |
+
|
| 781 |
+
# common endpoints with NA
|
| 782 |
+
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
| 783 |
+
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
| 784 |
+
result = index.is_overlapping
|
| 785 |
+
assert result is expected
|
| 786 |
+
|
| 787 |
+
# intervals with duplicate left values
|
| 788 |
+
a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
|
| 789 |
+
b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
|
| 790 |
+
index = IntervalIndex.from_arrays(a, b, closed="right")
|
| 791 |
+
result = index.is_overlapping
|
| 792 |
+
assert result is False
|
| 793 |
+
|
| 794 |
+
@pytest.mark.parametrize(
|
| 795 |
+
"tuples",
|
| 796 |
+
[
|
| 797 |
+
list(zip(range(10), range(1, 11))),
|
| 798 |
+
list(
|
| 799 |
+
zip(
|
| 800 |
+
date_range("20170101", periods=10),
|
| 801 |
+
date_range("20170101", periods=10),
|
| 802 |
+
)
|
| 803 |
+
),
|
| 804 |
+
list(
|
| 805 |
+
zip(
|
| 806 |
+
timedelta_range("0 days", periods=10),
|
| 807 |
+
timedelta_range("1 day", periods=10),
|
| 808 |
+
)
|
| 809 |
+
),
|
| 810 |
+
],
|
| 811 |
+
)
|
| 812 |
+
def test_to_tuples(self, tuples):
|
| 813 |
+
# GH 18756
|
| 814 |
+
idx = IntervalIndex.from_tuples(tuples)
|
| 815 |
+
result = idx.to_tuples()
|
| 816 |
+
expected = Index(com.asarray_tuplesafe(tuples))
|
| 817 |
+
tm.assert_index_equal(result, expected)
|
| 818 |
+
|
| 819 |
+
@pytest.mark.parametrize(
|
| 820 |
+
"tuples",
|
| 821 |
+
[
|
| 822 |
+
list(zip(range(10), range(1, 11))) + [np.nan],
|
| 823 |
+
list(
|
| 824 |
+
zip(
|
| 825 |
+
date_range("20170101", periods=10),
|
| 826 |
+
date_range("20170101", periods=10),
|
| 827 |
+
)
|
| 828 |
+
)
|
| 829 |
+
+ [np.nan],
|
| 830 |
+
list(
|
| 831 |
+
zip(
|
| 832 |
+
timedelta_range("0 days", periods=10),
|
| 833 |
+
timedelta_range("1 day", periods=10),
|
| 834 |
+
)
|
| 835 |
+
)
|
| 836 |
+
+ [np.nan],
|
| 837 |
+
],
|
| 838 |
+
)
|
| 839 |
+
@pytest.mark.parametrize("na_tuple", [True, False])
|
| 840 |
+
def test_to_tuples_na(self, tuples, na_tuple):
|
| 841 |
+
# GH 18756
|
| 842 |
+
idx = IntervalIndex.from_tuples(tuples)
|
| 843 |
+
result = idx.to_tuples(na_tuple=na_tuple)
|
| 844 |
+
|
| 845 |
+
# check the non-NA portion
|
| 846 |
+
expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
|
| 847 |
+
result_notna = result[:-1]
|
| 848 |
+
tm.assert_index_equal(result_notna, expected_notna)
|
| 849 |
+
|
| 850 |
+
# check the NA portion
|
| 851 |
+
result_na = result[-1]
|
| 852 |
+
if na_tuple:
|
| 853 |
+
assert isinstance(result_na, tuple)
|
| 854 |
+
assert len(result_na) == 2
|
| 855 |
+
assert all(isna(x) for x in result_na)
|
| 856 |
+
else:
|
| 857 |
+
assert isna(result_na)
|
| 858 |
+
|
| 859 |
+
def test_nbytes(self):
|
| 860 |
+
# GH 19209
|
| 861 |
+
left = np.arange(0, 4, dtype="i8")
|
| 862 |
+
right = np.arange(1, 5, dtype="i8")
|
| 863 |
+
|
| 864 |
+
result = IntervalIndex.from_arrays(left, right).nbytes
|
| 865 |
+
expected = 64 # 4 * 8 * 2
|
| 866 |
+
assert result == expected
|
| 867 |
+
|
| 868 |
+
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
| 869 |
+
def test_set_closed(self, name, closed, new_closed):
|
| 870 |
+
# GH 21670
|
| 871 |
+
index = interval_range(0, 5, closed=closed, name=name)
|
| 872 |
+
result = index.set_closed(new_closed)
|
| 873 |
+
expected = interval_range(0, 5, closed=new_closed, name=name)
|
| 874 |
+
tm.assert_index_equal(result, expected)
|
| 875 |
+
|
| 876 |
+
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
|
| 877 |
+
def test_set_closed_errors(self, bad_closed):
|
| 878 |
+
# GH 21670
|
| 879 |
+
index = interval_range(0, 5)
|
| 880 |
+
msg = f"invalid option for 'closed': {bad_closed}"
|
| 881 |
+
with pytest.raises(ValueError, match=msg):
|
| 882 |
+
index.set_closed(bad_closed)
|
| 883 |
+
|
| 884 |
+
def test_is_all_dates(self):
|
| 885 |
+
# GH 23576
|
| 886 |
+
year_2017 = Interval(
|
| 887 |
+
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
|
| 888 |
+
)
|
| 889 |
+
year_2017_index = IntervalIndex([year_2017])
|
| 890 |
+
assert not year_2017_index._is_all_dates
|
| 891 |
+
|
| 892 |
+
|
| 893 |
+
def test_dir():
|
| 894 |
+
# GH#27571 dir(interval_index) should not raise
|
| 895 |
+
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
| 896 |
+
result = dir(index)
|
| 897 |
+
assert "str" not in result
|
| 898 |
+
|
| 899 |
+
|
| 900 |
+
def test_searchsorted_different_argument_classes(listlike_box):
|
| 901 |
+
# https://github.com/pandas-dev/pandas/issues/32762
|
| 902 |
+
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
| 903 |
+
result = values.searchsorted(listlike_box(values))
|
| 904 |
+
expected = np.array([0, 1], dtype=result.dtype)
|
| 905 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 906 |
+
|
| 907 |
+
result = values._data.searchsorted(listlike_box(values))
|
| 908 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 909 |
+
|
| 910 |
+
|
| 911 |
+
@pytest.mark.parametrize(
|
| 912 |
+
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
|
| 913 |
+
)
|
| 914 |
+
def test_searchsorted_invalid_argument(arg):
|
| 915 |
+
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
| 916 |
+
msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
|
| 917 |
+
with pytest.raises(TypeError, match=msg):
|
| 918 |
+
values.searchsorted(arg)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import timedelta
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas.core.dtypes.common import is_integer
|
| 7 |
+
|
| 8 |
+
from pandas import (
|
| 9 |
+
DateOffset,
|
| 10 |
+
Interval,
|
| 11 |
+
IntervalIndex,
|
| 12 |
+
Timedelta,
|
| 13 |
+
Timestamp,
|
| 14 |
+
date_range,
|
| 15 |
+
interval_range,
|
| 16 |
+
timedelta_range,
|
| 17 |
+
)
|
| 18 |
+
import pandas._testing as tm
|
| 19 |
+
|
| 20 |
+
from pandas.tseries.offsets import Day
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@pytest.fixture(params=[None, "foo"])
|
| 24 |
+
def name(request):
|
| 25 |
+
return request.param
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class TestIntervalRange:
|
| 29 |
+
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
|
| 30 |
+
def test_constructor_numeric(self, closed, name, freq, periods):
|
| 31 |
+
start, end = 0, 100
|
| 32 |
+
breaks = np.arange(101, step=freq)
|
| 33 |
+
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
| 34 |
+
|
| 35 |
+
# defined from start/end/freq
|
| 36 |
+
result = interval_range(
|
| 37 |
+
start=start, end=end, freq=freq, name=name, closed=closed
|
| 38 |
+
)
|
| 39 |
+
tm.assert_index_equal(result, expected)
|
| 40 |
+
|
| 41 |
+
# defined from start/periods/freq
|
| 42 |
+
result = interval_range(
|
| 43 |
+
start=start, periods=periods, freq=freq, name=name, closed=closed
|
| 44 |
+
)
|
| 45 |
+
tm.assert_index_equal(result, expected)
|
| 46 |
+
|
| 47 |
+
# defined from end/periods/freq
|
| 48 |
+
result = interval_range(
|
| 49 |
+
end=end, periods=periods, freq=freq, name=name, closed=closed
|
| 50 |
+
)
|
| 51 |
+
tm.assert_index_equal(result, expected)
|
| 52 |
+
|
| 53 |
+
# GH 20976: linspace behavior defined from start/end/periods
|
| 54 |
+
result = interval_range(
|
| 55 |
+
start=start, end=end, periods=periods, name=name, closed=closed
|
| 56 |
+
)
|
| 57 |
+
tm.assert_index_equal(result, expected)
|
| 58 |
+
|
| 59 |
+
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
| 60 |
+
@pytest.mark.parametrize(
|
| 61 |
+
"freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)]
|
| 62 |
+
)
|
| 63 |
+
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
|
| 64 |
+
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
|
| 65 |
+
breaks = date_range(start=start, end=end, freq=freq)
|
| 66 |
+
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
| 67 |
+
|
| 68 |
+
# defined from start/end/freq
|
| 69 |
+
result = interval_range(
|
| 70 |
+
start=start, end=end, freq=freq, name=name, closed=closed
|
| 71 |
+
)
|
| 72 |
+
tm.assert_index_equal(result, expected)
|
| 73 |
+
|
| 74 |
+
# defined from start/periods/freq
|
| 75 |
+
result = interval_range(
|
| 76 |
+
start=start, periods=periods, freq=freq, name=name, closed=closed
|
| 77 |
+
)
|
| 78 |
+
tm.assert_index_equal(result, expected)
|
| 79 |
+
|
| 80 |
+
# defined from end/periods/freq
|
| 81 |
+
result = interval_range(
|
| 82 |
+
end=end, periods=periods, freq=freq, name=name, closed=closed
|
| 83 |
+
)
|
| 84 |
+
tm.assert_index_equal(result, expected)
|
| 85 |
+
|
| 86 |
+
# GH 20976: linspace behavior defined from start/end/periods
|
| 87 |
+
if not breaks.freq.n == 1 and tz is None:
|
| 88 |
+
result = interval_range(
|
| 89 |
+
start=start, end=end, periods=periods, name=name, closed=closed
|
| 90 |
+
)
|
| 91 |
+
tm.assert_index_equal(result, expected)
|
| 92 |
+
|
| 93 |
+
@pytest.mark.parametrize(
|
| 94 |
+
"freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)]
|
| 95 |
+
)
|
| 96 |
+
def test_constructor_timedelta(self, closed, name, freq, periods):
|
| 97 |
+
start, end = Timedelta("0 days"), Timedelta("100 days")
|
| 98 |
+
breaks = timedelta_range(start=start, end=end, freq=freq)
|
| 99 |
+
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
| 100 |
+
|
| 101 |
+
# defined from start/end/freq
|
| 102 |
+
result = interval_range(
|
| 103 |
+
start=start, end=end, freq=freq, name=name, closed=closed
|
| 104 |
+
)
|
| 105 |
+
tm.assert_index_equal(result, expected)
|
| 106 |
+
|
| 107 |
+
# defined from start/periods/freq
|
| 108 |
+
result = interval_range(
|
| 109 |
+
start=start, periods=periods, freq=freq, name=name, closed=closed
|
| 110 |
+
)
|
| 111 |
+
tm.assert_index_equal(result, expected)
|
| 112 |
+
|
| 113 |
+
# defined from end/periods/freq
|
| 114 |
+
result = interval_range(
|
| 115 |
+
end=end, periods=periods, freq=freq, name=name, closed=closed
|
| 116 |
+
)
|
| 117 |
+
tm.assert_index_equal(result, expected)
|
| 118 |
+
|
| 119 |
+
# GH 20976: linspace behavior defined from start/end/periods
|
| 120 |
+
result = interval_range(
|
| 121 |
+
start=start, end=end, periods=periods, name=name, closed=closed
|
| 122 |
+
)
|
| 123 |
+
tm.assert_index_equal(result, expected)
|
| 124 |
+
|
| 125 |
+
@pytest.mark.parametrize(
|
| 126 |
+
"start, end, freq, expected_endpoint",
|
| 127 |
+
[
|
| 128 |
+
(0, 10, 3, 9),
|
| 129 |
+
(0, 10, 1.5, 9),
|
| 130 |
+
(0.5, 10, 3, 9.5),
|
| 131 |
+
(Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")),
|
| 132 |
+
(
|
| 133 |
+
Timestamp("2018-01-01"),
|
| 134 |
+
Timestamp("2018-02-09"),
|
| 135 |
+
"MS",
|
| 136 |
+
Timestamp("2018-02-01"),
|
| 137 |
+
),
|
| 138 |
+
(
|
| 139 |
+
Timestamp("2018-01-01", tz="US/Eastern"),
|
| 140 |
+
Timestamp("2018-01-20", tz="US/Eastern"),
|
| 141 |
+
"5D12h",
|
| 142 |
+
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
|
| 143 |
+
),
|
| 144 |
+
],
|
| 145 |
+
)
|
| 146 |
+
def test_early_truncation(self, start, end, freq, expected_endpoint):
|
| 147 |
+
# index truncates early if freq causes end to be skipped
|
| 148 |
+
result = interval_range(start=start, end=end, freq=freq)
|
| 149 |
+
result_endpoint = result.right[-1]
|
| 150 |
+
assert result_endpoint == expected_endpoint
|
| 151 |
+
|
| 152 |
+
@pytest.mark.parametrize(
|
| 153 |
+
"start, end, freq",
|
| 154 |
+
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
|
| 155 |
+
)
|
| 156 |
+
def test_no_invalid_float_truncation(self, start, end, freq):
|
| 157 |
+
# GH 21161
|
| 158 |
+
if freq is None:
|
| 159 |
+
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
|
| 160 |
+
else:
|
| 161 |
+
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
|
| 162 |
+
expected = IntervalIndex.from_breaks(breaks)
|
| 163 |
+
|
| 164 |
+
result = interval_range(start=start, end=end, periods=4, freq=freq)
|
| 165 |
+
tm.assert_index_equal(result, expected)
|
| 166 |
+
|
| 167 |
+
@pytest.mark.parametrize(
|
| 168 |
+
"start, mid, end",
|
| 169 |
+
[
|
| 170 |
+
(
|
| 171 |
+
Timestamp("2018-03-10", tz="US/Eastern"),
|
| 172 |
+
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
|
| 173 |
+
Timestamp("2018-03-12", tz="US/Eastern"),
|
| 174 |
+
),
|
| 175 |
+
(
|
| 176 |
+
Timestamp("2018-11-03", tz="US/Eastern"),
|
| 177 |
+
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
|
| 178 |
+
Timestamp("2018-11-05", tz="US/Eastern"),
|
| 179 |
+
),
|
| 180 |
+
],
|
| 181 |
+
)
|
| 182 |
+
def test_linspace_dst_transition(self, start, mid, end):
|
| 183 |
+
# GH 20976: linspace behavior defined from start/end/periods
|
| 184 |
+
# accounts for the hour gained/lost during DST transition
|
| 185 |
+
start = start.as_unit("ns")
|
| 186 |
+
mid = mid.as_unit("ns")
|
| 187 |
+
end = end.as_unit("ns")
|
| 188 |
+
result = interval_range(start=start, end=end, periods=2)
|
| 189 |
+
expected = IntervalIndex.from_breaks([start, mid, end])
|
| 190 |
+
tm.assert_index_equal(result, expected)
|
| 191 |
+
|
| 192 |
+
@pytest.mark.parametrize("freq", [2, 2.0])
|
| 193 |
+
@pytest.mark.parametrize("end", [10, 10.0])
|
| 194 |
+
@pytest.mark.parametrize("start", [0, 0.0])
|
| 195 |
+
def test_float_subtype(self, start, end, freq):
|
| 196 |
+
# Has float subtype if any of start/end/freq are float, even if all
|
| 197 |
+
# resulting endpoints can safely be upcast to integers
|
| 198 |
+
|
| 199 |
+
# defined from start/end/freq
|
| 200 |
+
index = interval_range(start=start, end=end, freq=freq)
|
| 201 |
+
result = index.dtype.subtype
|
| 202 |
+
expected = "int64" if is_integer(start + end + freq) else "float64"
|
| 203 |
+
assert result == expected
|
| 204 |
+
|
| 205 |
+
# defined from start/periods/freq
|
| 206 |
+
index = interval_range(start=start, periods=5, freq=freq)
|
| 207 |
+
result = index.dtype.subtype
|
| 208 |
+
expected = "int64" if is_integer(start + freq) else "float64"
|
| 209 |
+
assert result == expected
|
| 210 |
+
|
| 211 |
+
# defined from end/periods/freq
|
| 212 |
+
index = interval_range(end=end, periods=5, freq=freq)
|
| 213 |
+
result = index.dtype.subtype
|
| 214 |
+
expected = "int64" if is_integer(end + freq) else "float64"
|
| 215 |
+
assert result == expected
|
| 216 |
+
|
| 217 |
+
# GH 20976: linspace behavior defined from start/end/periods
|
| 218 |
+
index = interval_range(start=start, end=end, periods=5)
|
| 219 |
+
result = index.dtype.subtype
|
| 220 |
+
expected = "int64" if is_integer(start + end) else "float64"
|
| 221 |
+
assert result == expected
|
| 222 |
+
|
| 223 |
+
def test_interval_range_fractional_period(self):
|
| 224 |
+
# float value for periods
|
| 225 |
+
expected = interval_range(start=0, periods=10)
|
| 226 |
+
msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range"
|
| 227 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 228 |
+
result = interval_range(start=0, periods=10.5)
|
| 229 |
+
tm.assert_index_equal(result, expected)
|
| 230 |
+
|
| 231 |
+
def test_constructor_coverage(self):
|
| 232 |
+
# equivalent timestamp-like start/end
|
| 233 |
+
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
|
| 234 |
+
expected = interval_range(start=start, end=end)
|
| 235 |
+
|
| 236 |
+
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
|
| 237 |
+
tm.assert_index_equal(result, expected)
|
| 238 |
+
|
| 239 |
+
result = interval_range(start=start.asm8, end=end.asm8)
|
| 240 |
+
tm.assert_index_equal(result, expected)
|
| 241 |
+
|
| 242 |
+
# equivalent freq with timestamp
|
| 243 |
+
equiv_freq = [
|
| 244 |
+
"D",
|
| 245 |
+
Day(),
|
| 246 |
+
Timedelta(days=1),
|
| 247 |
+
timedelta(days=1),
|
| 248 |
+
DateOffset(days=1),
|
| 249 |
+
]
|
| 250 |
+
for freq in equiv_freq:
|
| 251 |
+
result = interval_range(start=start, end=end, freq=freq)
|
| 252 |
+
tm.assert_index_equal(result, expected)
|
| 253 |
+
|
| 254 |
+
# equivalent timedelta-like start/end
|
| 255 |
+
start, end = Timedelta(days=1), Timedelta(days=10)
|
| 256 |
+
expected = interval_range(start=start, end=end)
|
| 257 |
+
|
| 258 |
+
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
|
| 259 |
+
tm.assert_index_equal(result, expected)
|
| 260 |
+
|
| 261 |
+
result = interval_range(start=start.asm8, end=end.asm8)
|
| 262 |
+
tm.assert_index_equal(result, expected)
|
| 263 |
+
|
| 264 |
+
# equivalent freq with timedelta
|
| 265 |
+
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
|
| 266 |
+
for freq in equiv_freq:
|
| 267 |
+
result = interval_range(start=start, end=end, freq=freq)
|
| 268 |
+
tm.assert_index_equal(result, expected)
|
| 269 |
+
|
| 270 |
+
def test_errors(self):
|
| 271 |
+
# not enough params
|
| 272 |
+
msg = (
|
| 273 |
+
"Of the four parameters: start, end, periods, and freq, "
|
| 274 |
+
"exactly three must be specified"
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
with pytest.raises(ValueError, match=msg):
|
| 278 |
+
interval_range(start=0)
|
| 279 |
+
|
| 280 |
+
with pytest.raises(ValueError, match=msg):
|
| 281 |
+
interval_range(end=5)
|
| 282 |
+
|
| 283 |
+
with pytest.raises(ValueError, match=msg):
|
| 284 |
+
interval_range(periods=2)
|
| 285 |
+
|
| 286 |
+
with pytest.raises(ValueError, match=msg):
|
| 287 |
+
interval_range()
|
| 288 |
+
|
| 289 |
+
# too many params
|
| 290 |
+
with pytest.raises(ValueError, match=msg):
|
| 291 |
+
interval_range(start=0, end=5, periods=6, freq=1.5)
|
| 292 |
+
|
| 293 |
+
# mixed units
|
| 294 |
+
msg = "start, end, freq need to be type compatible"
|
| 295 |
+
with pytest.raises(TypeError, match=msg):
|
| 296 |
+
interval_range(start=0, end=Timestamp("20130101"), freq=2)
|
| 297 |
+
|
| 298 |
+
with pytest.raises(TypeError, match=msg):
|
| 299 |
+
interval_range(start=0, end=Timedelta("1 day"), freq=2)
|
| 300 |
+
|
| 301 |
+
with pytest.raises(TypeError, match=msg):
|
| 302 |
+
interval_range(start=0, end=10, freq="D")
|
| 303 |
+
|
| 304 |
+
with pytest.raises(TypeError, match=msg):
|
| 305 |
+
interval_range(start=Timestamp("20130101"), end=10, freq="D")
|
| 306 |
+
|
| 307 |
+
with pytest.raises(TypeError, match=msg):
|
| 308 |
+
interval_range(
|
| 309 |
+
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
with pytest.raises(TypeError, match=msg):
|
| 313 |
+
interval_range(
|
| 314 |
+
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
with pytest.raises(TypeError, match=msg):
|
| 318 |
+
interval_range(start=Timedelta("1 day"), end=10, freq="D")
|
| 319 |
+
|
| 320 |
+
with pytest.raises(TypeError, match=msg):
|
| 321 |
+
interval_range(
|
| 322 |
+
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
with pytest.raises(TypeError, match=msg):
|
| 326 |
+
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
|
| 327 |
+
|
| 328 |
+
# invalid periods
|
| 329 |
+
msg = "periods must be a number, got foo"
|
| 330 |
+
with pytest.raises(TypeError, match=msg):
|
| 331 |
+
interval_range(start=0, periods="foo")
|
| 332 |
+
|
| 333 |
+
# invalid start
|
| 334 |
+
msg = "start must be numeric or datetime-like, got foo"
|
| 335 |
+
with pytest.raises(ValueError, match=msg):
|
| 336 |
+
interval_range(start="foo", periods=10)
|
| 337 |
+
|
| 338 |
+
# invalid end
|
| 339 |
+
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
|
| 340 |
+
with pytest.raises(ValueError, match=msg):
|
| 341 |
+
interval_range(end=Interval(0, 1), periods=10)
|
| 342 |
+
|
| 343 |
+
# invalid freq for datetime-like
|
| 344 |
+
msg = "freq must be numeric or convertible to DateOffset, got foo"
|
| 345 |
+
with pytest.raises(ValueError, match=msg):
|
| 346 |
+
interval_range(start=0, end=10, freq="foo")
|
| 347 |
+
|
| 348 |
+
with pytest.raises(ValueError, match=msg):
|
| 349 |
+
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
|
| 350 |
+
|
| 351 |
+
with pytest.raises(ValueError, match=msg):
|
| 352 |
+
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
|
| 353 |
+
|
| 354 |
+
# mixed tz
|
| 355 |
+
start = Timestamp("2017-01-01", tz="US/Eastern")
|
| 356 |
+
end = Timestamp("2017-01-07", tz="US/Pacific")
|
| 357 |
+
msg = "Start and end cannot both be tz-aware with different timezones"
|
| 358 |
+
with pytest.raises(TypeError, match=msg):
|
| 359 |
+
interval_range(start=start, end=end)
|
| 360 |
+
|
| 361 |
+
def test_float_freq(self):
|
| 362 |
+
# GH 54477
|
| 363 |
+
result = interval_range(0, 1, freq=0.1)
|
| 364 |
+
expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)])
|
| 365 |
+
tm.assert_index_equal(result, expected)
|
| 366 |
+
|
| 367 |
+
result = interval_range(0, 1, freq=0.6)
|
| 368 |
+
expected = IntervalIndex.from_breaks([0, 0.6])
|
| 369 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from itertools import permutations
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas._libs.interval import IntervalTree
|
| 7 |
+
from pandas.compat import IS64
|
| 8 |
+
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def skipif_32bit(param):
|
| 13 |
+
"""
|
| 14 |
+
Skip parameters in a parametrize on 32bit systems. Specifically used
|
| 15 |
+
here to skip leaf_size parameters related to GH 23440.
|
| 16 |
+
"""
|
| 17 |
+
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
|
| 18 |
+
return pytest.param(param, marks=marks)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@pytest.fixture(params=["int64", "float64", "uint64"])
|
| 22 |
+
def dtype(request):
|
| 23 |
+
return request.param
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
|
| 27 |
+
def leaf_size(request):
|
| 28 |
+
"""
|
| 29 |
+
Fixture to specify IntervalTree leaf_size parameter; to be used with the
|
| 30 |
+
tree fixture.
|
| 31 |
+
"""
|
| 32 |
+
return request.param
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@pytest.fixture(
|
| 36 |
+
params=[
|
| 37 |
+
np.arange(5, dtype="int64"),
|
| 38 |
+
np.arange(5, dtype="uint64"),
|
| 39 |
+
np.arange(5, dtype="float64"),
|
| 40 |
+
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
|
| 41 |
+
]
|
| 42 |
+
)
|
| 43 |
+
def tree(request, leaf_size):
|
| 44 |
+
left = request.param
|
| 45 |
+
return IntervalTree(left, left + 2, leaf_size=leaf_size)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class TestIntervalTree:
|
| 49 |
+
def test_get_indexer(self, tree):
|
| 50 |
+
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
|
| 51 |
+
expected = np.array([0, 4, -1], dtype="intp")
|
| 52 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 53 |
+
|
| 54 |
+
with pytest.raises(
|
| 55 |
+
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
| 56 |
+
):
|
| 57 |
+
tree.get_indexer(np.array([3.0]))
|
| 58 |
+
|
| 59 |
+
@pytest.mark.parametrize(
|
| 60 |
+
"dtype, target_value, target_dtype",
|
| 61 |
+
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
| 62 |
+
)
|
| 63 |
+
def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
|
| 64 |
+
left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
|
| 65 |
+
tree = IntervalTree(left, right)
|
| 66 |
+
|
| 67 |
+
result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
|
| 68 |
+
expected = np.array([-1], dtype="intp")
|
| 69 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 70 |
+
|
| 71 |
+
def test_get_indexer_non_unique(self, tree):
|
| 72 |
+
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
|
| 73 |
+
|
| 74 |
+
result = indexer[:1]
|
| 75 |
+
expected = np.array([0], dtype="intp")
|
| 76 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 77 |
+
|
| 78 |
+
result = np.sort(indexer[1:3])
|
| 79 |
+
expected = np.array([0, 1], dtype="intp")
|
| 80 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 81 |
+
|
| 82 |
+
result = np.sort(indexer[3:])
|
| 83 |
+
expected = np.array([-1], dtype="intp")
|
| 84 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 85 |
+
|
| 86 |
+
result = missing
|
| 87 |
+
expected = np.array([2], dtype="intp")
|
| 88 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
@pytest.mark.parametrize(
|
| 91 |
+
"dtype, target_value, target_dtype",
|
| 92 |
+
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
| 93 |
+
)
|
| 94 |
+
def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
|
| 95 |
+
left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
|
| 96 |
+
tree = IntervalTree(left, right)
|
| 97 |
+
target = np.array([target_value], dtype=target_dtype)
|
| 98 |
+
|
| 99 |
+
result_indexer, result_missing = tree.get_indexer_non_unique(target)
|
| 100 |
+
expected_indexer = np.array([-1], dtype="intp")
|
| 101 |
+
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
| 102 |
+
|
| 103 |
+
expected_missing = np.array([0], dtype="intp")
|
| 104 |
+
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
| 105 |
+
|
| 106 |
+
def test_duplicates(self, dtype):
|
| 107 |
+
left = np.array([0, 0, 0], dtype=dtype)
|
| 108 |
+
tree = IntervalTree(left, left + 1)
|
| 109 |
+
|
| 110 |
+
with pytest.raises(
|
| 111 |
+
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
| 112 |
+
):
|
| 113 |
+
tree.get_indexer(np.array([0.5]))
|
| 114 |
+
|
| 115 |
+
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
|
| 116 |
+
result = np.sort(indexer)
|
| 117 |
+
expected = np.array([0, 1, 2], dtype="intp")
|
| 118 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 119 |
+
|
| 120 |
+
result = missing
|
| 121 |
+
expected = np.array([], dtype="intp")
|
| 122 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 123 |
+
|
| 124 |
+
@pytest.mark.parametrize(
|
| 125 |
+
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
|
| 126 |
+
)
|
| 127 |
+
def test_get_indexer_closed(self, closed, leaf_size):
|
| 128 |
+
x = np.arange(1000, dtype="float64")
|
| 129 |
+
found = x.astype("intp")
|
| 130 |
+
not_found = (-1 * np.ones(1000)).astype("intp")
|
| 131 |
+
|
| 132 |
+
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
|
| 133 |
+
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
|
| 134 |
+
|
| 135 |
+
expected = found if tree.closed_left else not_found
|
| 136 |
+
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
|
| 137 |
+
|
| 138 |
+
expected = found if tree.closed_right else not_found
|
| 139 |
+
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
|
| 140 |
+
|
| 141 |
+
@pytest.mark.parametrize(
|
| 142 |
+
"left, right, expected",
|
| 143 |
+
[
|
| 144 |
+
(np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
|
| 145 |
+
(np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
|
| 146 |
+
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
|
| 147 |
+
(np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
|
| 148 |
+
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
|
| 149 |
+
],
|
| 150 |
+
)
|
| 151 |
+
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
| 152 |
+
def test_is_overlapping(self, closed, order, left, right, expected):
|
| 153 |
+
# GH 23309
|
| 154 |
+
tree = IntervalTree(left[order], right[order], closed=closed)
|
| 155 |
+
result = tree.is_overlapping
|
| 156 |
+
assert result is expected
|
| 157 |
+
|
| 158 |
+
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
| 159 |
+
def test_is_overlapping_endpoints(self, closed, order):
|
| 160 |
+
"""shared endpoints are marked as overlapping"""
|
| 161 |
+
# GH 23309
|
| 162 |
+
left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
|
| 163 |
+
tree = IntervalTree(left[order], right[order], closed=closed)
|
| 164 |
+
result = tree.is_overlapping
|
| 165 |
+
expected = closed == "both"
|
| 166 |
+
assert result is expected
|
| 167 |
+
|
| 168 |
+
@pytest.mark.parametrize(
|
| 169 |
+
"left, right",
|
| 170 |
+
[
|
| 171 |
+
(np.array([], dtype="int64"), np.array([], dtype="int64")),
|
| 172 |
+
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
|
| 173 |
+
(np.array([np.nan]), np.array([np.nan])),
|
| 174 |
+
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
|
| 175 |
+
],
|
| 176 |
+
)
|
| 177 |
+
def test_is_overlapping_trivial(self, closed, left, right):
|
| 178 |
+
# GH 23309
|
| 179 |
+
tree = IntervalTree(left, right, closed=closed)
|
| 180 |
+
assert tree.is_overlapping is False
|
| 181 |
+
|
| 182 |
+
@pytest.mark.skipif(not IS64, reason="GH 23440")
|
| 183 |
+
def test_construction_overflow(self):
|
| 184 |
+
# GH 25485
|
| 185 |
+
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
|
| 186 |
+
tree = IntervalTree(left, right)
|
| 187 |
+
|
| 188 |
+
# pivot should be average of left/right medians
|
| 189 |
+
result = tree.root.pivot
|
| 190 |
+
expected = (50 + np.iinfo(np.int64).max) / 2
|
| 191 |
+
assert result == expected
|
| 192 |
+
|
| 193 |
+
@pytest.mark.parametrize(
|
| 194 |
+
"left, right, expected",
|
| 195 |
+
[
|
| 196 |
+
([-np.inf, 1.0], [1.0, 2.0], 0.0),
|
| 197 |
+
([-np.inf, -2.0], [-2.0, -1.0], -2.0),
|
| 198 |
+
([-2.0, -1.0], [-1.0, np.inf], 0.0),
|
| 199 |
+
([1.0, 2.0], [2.0, np.inf], 2.0),
|
| 200 |
+
],
|
| 201 |
+
)
|
| 202 |
+
def test_inf_bound_infinite_recursion(self, left, right, expected):
|
| 203 |
+
# GH 46658
|
| 204 |
+
|
| 205 |
+
tree = IntervalTree(left * 101, right * 101)
|
| 206 |
+
|
| 207 |
+
result = tree.root.pivot
|
| 208 |
+
assert result == expected
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
IntervalIndex,
|
| 5 |
+
MultiIndex,
|
| 6 |
+
RangeIndex,
|
| 7 |
+
)
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.fixture
|
| 12 |
+
def range_index():
|
| 13 |
+
return RangeIndex(3, name="range_index")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@pytest.fixture
|
| 17 |
+
def interval_index():
|
| 18 |
+
return IntervalIndex.from_tuples(
|
| 19 |
+
[(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
|
| 24 |
+
# GH-45661
|
| 25 |
+
multi_index = MultiIndex.from_product([interval_index, range_index])
|
| 26 |
+
result = multi_index.join(interval_index)
|
| 27 |
+
|
| 28 |
+
tm.assert_index_equal(result, multi_index)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
|
| 32 |
+
# GH-45661
|
| 33 |
+
multi_index = MultiIndex.from_product([interval_index, range_index])
|
| 34 |
+
result = interval_index.join(multi_index)
|
| 35 |
+
|
| 36 |
+
tm.assert_index_equal(result, multi_index)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def test_join_overlapping_interval_to_another_intervalindex(interval_index):
|
| 40 |
+
# GH-45661
|
| 41 |
+
flipped_interval_index = interval_index[::-1]
|
| 42 |
+
result = interval_index.join(flipped_interval_index)
|
| 43 |
+
|
| 44 |
+
tm.assert_index_equal(result, interval_index)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import IntervalIndex
|
| 4 |
+
import pandas._testing as tm
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestPickle:
|
| 8 |
+
@pytest.mark.parametrize("closed", ["left", "right", "both"])
|
| 9 |
+
def test_pickle_round_trip_closed(self, closed):
|
| 10 |
+
# https://github.com/pandas-dev/pandas/issues/35658
|
| 11 |
+
idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
|
| 12 |
+
result = tm.round_trip_pickle(idx)
|
| 13 |
+
tm.assert_index_equal(result, idx)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Index,
|
| 6 |
+
IntervalIndex,
|
| 7 |
+
Timestamp,
|
| 8 |
+
interval_range,
|
| 9 |
+
)
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def monotonic_index(start, end, dtype="int64", closed="right"):
|
| 14 |
+
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def empty_index(dtype="int64", closed="right"):
|
| 18 |
+
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class TestIntervalIndex:
|
| 22 |
+
def test_union(self, closed, sort):
|
| 23 |
+
index = monotonic_index(0, 11, closed=closed)
|
| 24 |
+
other = monotonic_index(5, 13, closed=closed)
|
| 25 |
+
|
| 26 |
+
expected = monotonic_index(0, 13, closed=closed)
|
| 27 |
+
result = index[::-1].union(other, sort=sort)
|
| 28 |
+
if sort in (None, True):
|
| 29 |
+
tm.assert_index_equal(result, expected)
|
| 30 |
+
else:
|
| 31 |
+
tm.assert_index_equal(result.sort_values(), expected)
|
| 32 |
+
|
| 33 |
+
result = other[::-1].union(index, sort=sort)
|
| 34 |
+
if sort in (None, True):
|
| 35 |
+
tm.assert_index_equal(result, expected)
|
| 36 |
+
else:
|
| 37 |
+
tm.assert_index_equal(result.sort_values(), expected)
|
| 38 |
+
|
| 39 |
+
tm.assert_index_equal(index.union(index, sort=sort), index)
|
| 40 |
+
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
|
| 41 |
+
|
| 42 |
+
def test_union_empty_result(self, closed, sort):
|
| 43 |
+
# GH 19101: empty result, same dtype
|
| 44 |
+
index = empty_index(dtype="int64", closed=closed)
|
| 45 |
+
result = index.union(index, sort=sort)
|
| 46 |
+
tm.assert_index_equal(result, index)
|
| 47 |
+
|
| 48 |
+
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
|
| 49 |
+
other = empty_index(dtype="float64", closed=closed)
|
| 50 |
+
result = index.union(other, sort=sort)
|
| 51 |
+
expected = other
|
| 52 |
+
tm.assert_index_equal(result, expected)
|
| 53 |
+
|
| 54 |
+
other = index.union(index, sort=sort)
|
| 55 |
+
tm.assert_index_equal(result, expected)
|
| 56 |
+
|
| 57 |
+
other = empty_index(dtype="uint64", closed=closed)
|
| 58 |
+
result = index.union(other, sort=sort)
|
| 59 |
+
tm.assert_index_equal(result, expected)
|
| 60 |
+
|
| 61 |
+
result = other.union(index, sort=sort)
|
| 62 |
+
tm.assert_index_equal(result, expected)
|
| 63 |
+
|
| 64 |
+
def test_intersection(self, closed, sort):
|
| 65 |
+
index = monotonic_index(0, 11, closed=closed)
|
| 66 |
+
other = monotonic_index(5, 13, closed=closed)
|
| 67 |
+
|
| 68 |
+
expected = monotonic_index(5, 11, closed=closed)
|
| 69 |
+
result = index[::-1].intersection(other, sort=sort)
|
| 70 |
+
if sort in (None, True):
|
| 71 |
+
tm.assert_index_equal(result, expected)
|
| 72 |
+
else:
|
| 73 |
+
tm.assert_index_equal(result.sort_values(), expected)
|
| 74 |
+
|
| 75 |
+
result = other[::-1].intersection(index, sort=sort)
|
| 76 |
+
if sort in (None, True):
|
| 77 |
+
tm.assert_index_equal(result, expected)
|
| 78 |
+
else:
|
| 79 |
+
tm.assert_index_equal(result.sort_values(), expected)
|
| 80 |
+
|
| 81 |
+
tm.assert_index_equal(index.intersection(index, sort=sort), index)
|
| 82 |
+
|
| 83 |
+
# GH 26225: nested intervals
|
| 84 |
+
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
|
| 85 |
+
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
| 86 |
+
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
| 87 |
+
result = index.intersection(other)
|
| 88 |
+
tm.assert_index_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
# GH 26225
|
| 91 |
+
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
|
| 92 |
+
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
|
| 93 |
+
expected = IntervalIndex.from_tuples([(0, 2)])
|
| 94 |
+
result = index.intersection(other)
|
| 95 |
+
tm.assert_index_equal(result, expected)
|
| 96 |
+
|
| 97 |
+
# GH 26225: duplicate nan element
|
| 98 |
+
index = IntervalIndex([np.nan, np.nan])
|
| 99 |
+
other = IntervalIndex([np.nan])
|
| 100 |
+
expected = IntervalIndex([np.nan])
|
| 101 |
+
result = index.intersection(other)
|
| 102 |
+
tm.assert_index_equal(result, expected)
|
| 103 |
+
|
| 104 |
+
def test_intersection_empty_result(self, closed, sort):
|
| 105 |
+
index = monotonic_index(0, 11, closed=closed)
|
| 106 |
+
|
| 107 |
+
# GH 19101: empty result, same dtype
|
| 108 |
+
other = monotonic_index(300, 314, closed=closed)
|
| 109 |
+
expected = empty_index(dtype="int64", closed=closed)
|
| 110 |
+
result = index.intersection(other, sort=sort)
|
| 111 |
+
tm.assert_index_equal(result, expected)
|
| 112 |
+
|
| 113 |
+
# GH 19101: empty result, different numeric dtypes -> common dtype is float64
|
| 114 |
+
other = monotonic_index(300, 314, dtype="float64", closed=closed)
|
| 115 |
+
result = index.intersection(other, sort=sort)
|
| 116 |
+
expected = other[:0]
|
| 117 |
+
tm.assert_index_equal(result, expected)
|
| 118 |
+
|
| 119 |
+
other = monotonic_index(300, 314, dtype="uint64", closed=closed)
|
| 120 |
+
result = index.intersection(other, sort=sort)
|
| 121 |
+
tm.assert_index_equal(result, expected)
|
| 122 |
+
|
| 123 |
+
def test_intersection_duplicates(self):
|
| 124 |
+
# GH#38743
|
| 125 |
+
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
|
| 126 |
+
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
| 127 |
+
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
| 128 |
+
result = index.intersection(other)
|
| 129 |
+
tm.assert_index_equal(result, expected)
|
| 130 |
+
|
| 131 |
+
def test_difference(self, closed, sort):
|
| 132 |
+
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
|
| 133 |
+
result = index.difference(index[:1], sort=sort)
|
| 134 |
+
expected = index[1:]
|
| 135 |
+
if sort is None:
|
| 136 |
+
expected = expected.sort_values()
|
| 137 |
+
tm.assert_index_equal(result, expected)
|
| 138 |
+
|
| 139 |
+
# GH 19101: empty result, same dtype
|
| 140 |
+
result = index.difference(index, sort=sort)
|
| 141 |
+
expected = empty_index(dtype="int64", closed=closed)
|
| 142 |
+
tm.assert_index_equal(result, expected)
|
| 143 |
+
|
| 144 |
+
# GH 19101: empty result, different dtypes
|
| 145 |
+
other = IntervalIndex.from_arrays(
|
| 146 |
+
index.left.astype("float64"), index.right, closed=closed
|
| 147 |
+
)
|
| 148 |
+
result = index.difference(other, sort=sort)
|
| 149 |
+
tm.assert_index_equal(result, expected)
|
| 150 |
+
|
| 151 |
+
def test_symmetric_difference(self, closed, sort):
|
| 152 |
+
index = monotonic_index(0, 11, closed=closed)
|
| 153 |
+
result = index[1:].symmetric_difference(index[:-1], sort=sort)
|
| 154 |
+
expected = IntervalIndex([index[0], index[-1]])
|
| 155 |
+
if sort in (None, True):
|
| 156 |
+
tm.assert_index_equal(result, expected)
|
| 157 |
+
else:
|
| 158 |
+
tm.assert_index_equal(result.sort_values(), expected)
|
| 159 |
+
|
| 160 |
+
# GH 19101: empty result, same dtype
|
| 161 |
+
result = index.symmetric_difference(index, sort=sort)
|
| 162 |
+
expected = empty_index(dtype="int64", closed=closed)
|
| 163 |
+
if sort in (None, True):
|
| 164 |
+
tm.assert_index_equal(result, expected)
|
| 165 |
+
else:
|
| 166 |
+
tm.assert_index_equal(result.sort_values(), expected)
|
| 167 |
+
|
| 168 |
+
# GH 19101: empty result, different dtypes
|
| 169 |
+
other = IntervalIndex.from_arrays(
|
| 170 |
+
index.left.astype("float64"), index.right, closed=closed
|
| 171 |
+
)
|
| 172 |
+
result = index.symmetric_difference(other, sort=sort)
|
| 173 |
+
expected = empty_index(dtype="float64", closed=closed)
|
| 174 |
+
tm.assert_index_equal(result, expected)
|
| 175 |
+
|
| 176 |
+
@pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
|
| 177 |
+
@pytest.mark.parametrize(
|
| 178 |
+
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
|
| 179 |
+
)
|
| 180 |
+
def test_set_incompatible_types(self, closed, op_name, sort):
|
| 181 |
+
index = monotonic_index(0, 11, closed=closed)
|
| 182 |
+
set_op = getattr(index, op_name)
|
| 183 |
+
|
| 184 |
+
# TODO: standardize return type of non-union setops type(self vs other)
|
| 185 |
+
# non-IntervalIndex
|
| 186 |
+
if op_name == "difference":
|
| 187 |
+
expected = index
|
| 188 |
+
else:
|
| 189 |
+
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
|
| 190 |
+
result = set_op(Index([1, 2, 3]), sort=sort)
|
| 191 |
+
tm.assert_index_equal(result, expected)
|
| 192 |
+
|
| 193 |
+
# mixed closed -> cast to object
|
| 194 |
+
for other_closed in {"right", "left", "both", "neither"} - {closed}:
|
| 195 |
+
other = monotonic_index(0, 11, closed=other_closed)
|
| 196 |
+
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
| 197 |
+
if op_name == "difference":
|
| 198 |
+
expected = index
|
| 199 |
+
result = set_op(other, sort=sort)
|
| 200 |
+
tm.assert_index_equal(result, expected)
|
| 201 |
+
|
| 202 |
+
# GH 19016: incompatible dtypes -> cast to object
|
| 203 |
+
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
|
| 204 |
+
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
| 205 |
+
if op_name == "difference":
|
| 206 |
+
expected = index
|
| 207 |
+
result = set_op(other, sort=sort)
|
| 208 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Index,
|
| 6 |
+
MultiIndex,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# Note: identical the "multi" entry in the top-level "index" fixture
|
| 11 |
+
@pytest.fixture
|
| 12 |
+
def idx():
|
| 13 |
+
# a MultiIndex used to test the general functionality of the
|
| 14 |
+
# general functionality of this object
|
| 15 |
+
major_axis = Index(["foo", "bar", "baz", "qux"])
|
| 16 |
+
minor_axis = Index(["one", "two"])
|
| 17 |
+
|
| 18 |
+
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
| 19 |
+
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
| 20 |
+
index_names = ["first", "second"]
|
| 21 |
+
mi = MultiIndex(
|
| 22 |
+
levels=[major_axis, minor_axis],
|
| 23 |
+
codes=[major_codes, minor_codes],
|
| 24 |
+
names=index_names,
|
| 25 |
+
verify_integrity=False,
|
| 26 |
+
)
|
| 27 |
+
return mi
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
Index,
|
| 7 |
+
MultiIndex,
|
| 8 |
+
date_range,
|
| 9 |
+
period_range,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_infer_objects(idx):
|
| 15 |
+
with pytest.raises(NotImplementedError, match="to_frame"):
|
| 16 |
+
idx.infer_objects()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_shift(idx):
|
| 20 |
+
# GH8083 test the base class for shift
|
| 21 |
+
msg = (
|
| 22 |
+
"This method is only implemented for DatetimeIndex, PeriodIndex and "
|
| 23 |
+
"TimedeltaIndex; Got type MultiIndex"
|
| 24 |
+
)
|
| 25 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 26 |
+
idx.shift(1)
|
| 27 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 28 |
+
idx.shift(1, 2)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_groupby(idx):
|
| 32 |
+
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
| 33 |
+
labels = idx.tolist()
|
| 34 |
+
exp = {1: labels[:3], 2: labels[3:]}
|
| 35 |
+
tm.assert_dict_equal(groups, exp)
|
| 36 |
+
|
| 37 |
+
# GH5620
|
| 38 |
+
groups = idx.groupby(idx)
|
| 39 |
+
exp = {key: [key] for key in idx}
|
| 40 |
+
tm.assert_dict_equal(groups, exp)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def test_truncate_multiindex():
|
| 44 |
+
# GH 34564 for MultiIndex level names check
|
| 45 |
+
major_axis = Index(list(range(4)))
|
| 46 |
+
minor_axis = Index(list(range(2)))
|
| 47 |
+
|
| 48 |
+
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
| 49 |
+
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
| 50 |
+
|
| 51 |
+
index = MultiIndex(
|
| 52 |
+
levels=[major_axis, minor_axis],
|
| 53 |
+
codes=[major_codes, minor_codes],
|
| 54 |
+
names=["L1", "L2"],
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
result = index.truncate(before=1)
|
| 58 |
+
assert "foo" not in result.levels[0]
|
| 59 |
+
assert 1 in result.levels[0]
|
| 60 |
+
assert index.names == result.names
|
| 61 |
+
|
| 62 |
+
result = index.truncate(after=1)
|
| 63 |
+
assert 2 not in result.levels[0]
|
| 64 |
+
assert 1 in result.levels[0]
|
| 65 |
+
assert index.names == result.names
|
| 66 |
+
|
| 67 |
+
result = index.truncate(before=1, after=2)
|
| 68 |
+
assert len(result.levels[0]) == 2
|
| 69 |
+
assert index.names == result.names
|
| 70 |
+
|
| 71 |
+
msg = "after < before"
|
| 72 |
+
with pytest.raises(ValueError, match=msg):
|
| 73 |
+
index.truncate(3, 1)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# TODO: reshape
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def test_reorder_levels(idx):
|
| 80 |
+
# this blows up
|
| 81 |
+
with pytest.raises(IndexError, match="^Too many levels"):
|
| 82 |
+
idx.reorder_levels([2, 1, 0])
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def test_numpy_repeat():
|
| 86 |
+
reps = 2
|
| 87 |
+
numbers = [1, 2, 3]
|
| 88 |
+
names = np.array(["foo", "bar"])
|
| 89 |
+
|
| 90 |
+
m = MultiIndex.from_product([numbers, names], names=names)
|
| 91 |
+
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
| 92 |
+
tm.assert_index_equal(np.repeat(m, reps), expected)
|
| 93 |
+
|
| 94 |
+
msg = "the 'axis' parameter is not supported"
|
| 95 |
+
with pytest.raises(ValueError, match=msg):
|
| 96 |
+
np.repeat(m, reps, axis=1)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def test_append_mixed_dtypes():
|
| 100 |
+
# GH 13660
|
| 101 |
+
dti = date_range("2011-01-01", freq="ME", periods=3)
|
| 102 |
+
dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
|
| 103 |
+
pi = period_range("2011-01", freq="M", periods=3)
|
| 104 |
+
|
| 105 |
+
mi = MultiIndex.from_arrays(
|
| 106 |
+
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
|
| 107 |
+
)
|
| 108 |
+
assert mi.nlevels == 6
|
| 109 |
+
|
| 110 |
+
res = mi.append(mi)
|
| 111 |
+
exp = MultiIndex.from_arrays(
|
| 112 |
+
[
|
| 113 |
+
[1, 2, 3, 1, 2, 3],
|
| 114 |
+
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
| 115 |
+
["a", "b", "c", "a", "b", "c"],
|
| 116 |
+
dti.append(dti),
|
| 117 |
+
dti_tz.append(dti_tz),
|
| 118 |
+
pi.append(pi),
|
| 119 |
+
]
|
| 120 |
+
)
|
| 121 |
+
tm.assert_index_equal(res, exp)
|
| 122 |
+
|
| 123 |
+
other = MultiIndex.from_arrays(
|
| 124 |
+
[
|
| 125 |
+
["x", "y", "z"],
|
| 126 |
+
["x", "y", "z"],
|
| 127 |
+
["x", "y", "z"],
|
| 128 |
+
["x", "y", "z"],
|
| 129 |
+
["x", "y", "z"],
|
| 130 |
+
["x", "y", "z"],
|
| 131 |
+
]
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
res = mi.append(other)
|
| 135 |
+
exp = MultiIndex.from_arrays(
|
| 136 |
+
[
|
| 137 |
+
[1, 2, 3, "x", "y", "z"],
|
| 138 |
+
[1.1, np.nan, 3.3, "x", "y", "z"],
|
| 139 |
+
["a", "b", "c", "x", "y", "z"],
|
| 140 |
+
dti.append(Index(["x", "y", "z"])),
|
| 141 |
+
dti_tz.append(Index(["x", "y", "z"])),
|
| 142 |
+
pi.append(Index(["x", "y", "z"])),
|
| 143 |
+
]
|
| 144 |
+
)
|
| 145 |
+
tm.assert_index_equal(res, exp)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def test_iter(idx):
|
| 149 |
+
result = list(idx)
|
| 150 |
+
expected = [
|
| 151 |
+
("foo", "one"),
|
| 152 |
+
("foo", "two"),
|
| 153 |
+
("bar", "one"),
|
| 154 |
+
("baz", "two"),
|
| 155 |
+
("qux", "one"),
|
| 156 |
+
("qux", "two"),
|
| 157 |
+
]
|
| 158 |
+
assert result == expected
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def test_sub(idx):
|
| 162 |
+
first = idx
|
| 163 |
+
|
| 164 |
+
# - now raises (previously was set op difference)
|
| 165 |
+
msg = "cannot perform __sub__ with this index type: MultiIndex"
|
| 166 |
+
with pytest.raises(TypeError, match=msg):
|
| 167 |
+
first - idx[-3:]
|
| 168 |
+
with pytest.raises(TypeError, match=msg):
|
| 169 |
+
idx[-3:] - first
|
| 170 |
+
with pytest.raises(TypeError, match=msg):
|
| 171 |
+
idx[-3:] - first.tolist()
|
| 172 |
+
msg = "cannot perform __rsub__ with this index type: MultiIndex"
|
| 173 |
+
with pytest.raises(TypeError, match=msg):
|
| 174 |
+
first.tolist() - idx[-3:]
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def test_map(idx):
|
| 178 |
+
# callable
|
| 179 |
+
index = idx
|
| 180 |
+
|
| 181 |
+
result = index.map(lambda x: x)
|
| 182 |
+
tm.assert_index_equal(result, index)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
@pytest.mark.parametrize(
|
| 186 |
+
"mapper",
|
| 187 |
+
[
|
| 188 |
+
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
| 189 |
+
lambda values, idx: pd.Series(values, idx),
|
| 190 |
+
],
|
| 191 |
+
)
|
| 192 |
+
def test_map_dictlike(idx, mapper):
|
| 193 |
+
identity = mapper(idx.values, idx)
|
| 194 |
+
|
| 195 |
+
# we don't infer to uint64 dtype for a dict
|
| 196 |
+
if idx.dtype == np.uint64 and isinstance(identity, dict):
|
| 197 |
+
expected = idx.astype("int64")
|
| 198 |
+
else:
|
| 199 |
+
expected = idx
|
| 200 |
+
|
| 201 |
+
result = idx.map(identity)
|
| 202 |
+
tm.assert_index_equal(result, expected)
|
| 203 |
+
|
| 204 |
+
# empty mappable
|
| 205 |
+
expected = Index([np.nan] * len(idx))
|
| 206 |
+
result = idx.map(mapper(expected, idx))
|
| 207 |
+
tm.assert_index_equal(result, expected)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
@pytest.mark.parametrize(
|
| 211 |
+
"func",
|
| 212 |
+
[
|
| 213 |
+
np.exp,
|
| 214 |
+
np.exp2,
|
| 215 |
+
np.expm1,
|
| 216 |
+
np.log,
|
| 217 |
+
np.log2,
|
| 218 |
+
np.log10,
|
| 219 |
+
np.log1p,
|
| 220 |
+
np.sqrt,
|
| 221 |
+
np.sin,
|
| 222 |
+
np.cos,
|
| 223 |
+
np.tan,
|
| 224 |
+
np.arcsin,
|
| 225 |
+
np.arccos,
|
| 226 |
+
np.arctan,
|
| 227 |
+
np.sinh,
|
| 228 |
+
np.cosh,
|
| 229 |
+
np.tanh,
|
| 230 |
+
np.arcsinh,
|
| 231 |
+
np.arccosh,
|
| 232 |
+
np.arctanh,
|
| 233 |
+
np.deg2rad,
|
| 234 |
+
np.rad2deg,
|
| 235 |
+
],
|
| 236 |
+
ids=lambda func: func.__name__,
|
| 237 |
+
)
|
| 238 |
+
def test_numpy_ufuncs(idx, func):
|
| 239 |
+
# test ufuncs of numpy. see:
|
| 240 |
+
# https://numpy.org/doc/stable/reference/ufuncs.html
|
| 241 |
+
|
| 242 |
+
expected_exception = TypeError
|
| 243 |
+
msg = (
|
| 244 |
+
"loop of ufunc does not support argument 0 of type tuple which "
|
| 245 |
+
f"has no callable {func.__name__} method"
|
| 246 |
+
)
|
| 247 |
+
with pytest.raises(expected_exception, match=msg):
|
| 248 |
+
func(idx)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
@pytest.mark.parametrize(
|
| 252 |
+
"func",
|
| 253 |
+
[np.isfinite, np.isinf, np.isnan, np.signbit],
|
| 254 |
+
ids=lambda func: func.__name__,
|
| 255 |
+
)
|
| 256 |
+
def test_numpy_type_funcs(idx, func):
|
| 257 |
+
msg = (
|
| 258 |
+
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
|
| 259 |
+
"could not be safely coerced to any supported types according to "
|
| 260 |
+
"the casting rule ''safe''"
|
| 261 |
+
)
|
| 262 |
+
with pytest.raises(TypeError, match=msg):
|
| 263 |
+
func(idx)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.dtypes.dtypes import CategoricalDtype
|
| 5 |
+
|
| 6 |
+
import pandas._testing as tm
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_astype(idx):
|
| 10 |
+
expected = idx.copy()
|
| 11 |
+
actual = idx.astype("O")
|
| 12 |
+
tm.assert_copy(actual.levels, expected.levels)
|
| 13 |
+
tm.assert_copy(actual.codes, expected.codes)
|
| 14 |
+
assert actual.names == list(expected.names)
|
| 15 |
+
|
| 16 |
+
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
| 17 |
+
idx.astype(np.dtype(int))
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@pytest.mark.parametrize("ordered", [True, False])
|
| 21 |
+
def test_astype_category(idx, ordered):
|
| 22 |
+
# GH 18630
|
| 23 |
+
msg = "> 1 ndim Categorical are not supported at this time"
|
| 24 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 25 |
+
idx.astype(CategoricalDtype(ordered=ordered))
|
| 26 |
+
|
| 27 |
+
if ordered is False:
|
| 28 |
+
# dtype='category' defaults to ordered=False, so only test once
|
| 29 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 30 |
+
idx.astype("category")
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py
ADDED
|
@@ -0,0 +1,860 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import (
|
| 2 |
+
date,
|
| 3 |
+
datetime,
|
| 4 |
+
)
|
| 5 |
+
import itertools
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pytest
|
| 9 |
+
|
| 10 |
+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
| 11 |
+
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from pandas import (
|
| 14 |
+
Index,
|
| 15 |
+
MultiIndex,
|
| 16 |
+
Series,
|
| 17 |
+
Timestamp,
|
| 18 |
+
date_range,
|
| 19 |
+
)
|
| 20 |
+
import pandas._testing as tm
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_constructor_single_level():
|
| 24 |
+
result = MultiIndex(
|
| 25 |
+
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
| 26 |
+
)
|
| 27 |
+
assert isinstance(result, MultiIndex)
|
| 28 |
+
expected = Index(["foo", "bar", "baz", "qux"], name="first")
|
| 29 |
+
tm.assert_index_equal(result.levels[0], expected)
|
| 30 |
+
assert result.names == ["first"]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_constructor_no_levels():
|
| 34 |
+
msg = "non-zero number of levels/codes"
|
| 35 |
+
with pytest.raises(ValueError, match=msg):
|
| 36 |
+
MultiIndex(levels=[], codes=[])
|
| 37 |
+
|
| 38 |
+
msg = "Must pass both levels and codes"
|
| 39 |
+
with pytest.raises(TypeError, match=msg):
|
| 40 |
+
MultiIndex(levels=[])
|
| 41 |
+
with pytest.raises(TypeError, match=msg):
|
| 42 |
+
MultiIndex(codes=[])
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_constructor_nonhashable_names():
|
| 46 |
+
# GH 20527
|
| 47 |
+
levels = [[1, 2], ["one", "two"]]
|
| 48 |
+
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
| 49 |
+
names = (["foo"], ["bar"])
|
| 50 |
+
msg = r"MultiIndex\.name must be a hashable type"
|
| 51 |
+
with pytest.raises(TypeError, match=msg):
|
| 52 |
+
MultiIndex(levels=levels, codes=codes, names=names)
|
| 53 |
+
|
| 54 |
+
# With .rename()
|
| 55 |
+
mi = MultiIndex(
|
| 56 |
+
levels=[[1, 2], ["one", "two"]],
|
| 57 |
+
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
| 58 |
+
names=("foo", "bar"),
|
| 59 |
+
)
|
| 60 |
+
renamed = [["fooo"], ["barr"]]
|
| 61 |
+
with pytest.raises(TypeError, match=msg):
|
| 62 |
+
mi.rename(names=renamed)
|
| 63 |
+
|
| 64 |
+
# With .set_names()
|
| 65 |
+
with pytest.raises(TypeError, match=msg):
|
| 66 |
+
mi.set_names(names=renamed)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def test_constructor_mismatched_codes_levels(idx):
|
| 70 |
+
codes = [np.array([1]), np.array([2]), np.array([3])]
|
| 71 |
+
levels = ["a"]
|
| 72 |
+
|
| 73 |
+
msg = "Length of levels and codes must be the same"
|
| 74 |
+
with pytest.raises(ValueError, match=msg):
|
| 75 |
+
MultiIndex(levels=levels, codes=codes)
|
| 76 |
+
|
| 77 |
+
length_error = (
|
| 78 |
+
r"On level 0, code max \(3\) >= length of level \(1\)\. "
|
| 79 |
+
"NOTE: this index is in an inconsistent state"
|
| 80 |
+
)
|
| 81 |
+
label_error = r"Unequal code lengths: \[4, 2\]"
|
| 82 |
+
code_value_error = r"On level 0, code value \(-2\) < -1"
|
| 83 |
+
|
| 84 |
+
# important to check that it's looking at the right thing.
|
| 85 |
+
with pytest.raises(ValueError, match=length_error):
|
| 86 |
+
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
| 87 |
+
|
| 88 |
+
with pytest.raises(ValueError, match=label_error):
|
| 89 |
+
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
|
| 90 |
+
|
| 91 |
+
# external API
|
| 92 |
+
with pytest.raises(ValueError, match=length_error):
|
| 93 |
+
idx.copy().set_levels([["a"], ["b"]])
|
| 94 |
+
|
| 95 |
+
with pytest.raises(ValueError, match=label_error):
|
| 96 |
+
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
| 97 |
+
|
| 98 |
+
# test set_codes with verify_integrity=False
|
| 99 |
+
# the setting should not raise any value error
|
| 100 |
+
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
|
| 101 |
+
|
| 102 |
+
# code value smaller than -1
|
| 103 |
+
with pytest.raises(ValueError, match=code_value_error):
|
| 104 |
+
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def test_na_levels():
|
| 108 |
+
# GH26408
|
| 109 |
+
# test if codes are re-assigned value -1 for levels
|
| 110 |
+
# with missing values (NaN, NaT, None)
|
| 111 |
+
result = MultiIndex(
|
| 112 |
+
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
|
| 113 |
+
)
|
| 114 |
+
expected = MultiIndex(
|
| 115 |
+
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
|
| 116 |
+
)
|
| 117 |
+
tm.assert_index_equal(result, expected)
|
| 118 |
+
|
| 119 |
+
result = MultiIndex(
|
| 120 |
+
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
|
| 121 |
+
)
|
| 122 |
+
expected = MultiIndex(
|
| 123 |
+
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
|
| 124 |
+
)
|
| 125 |
+
tm.assert_index_equal(result, expected)
|
| 126 |
+
|
| 127 |
+
# verify set_levels and set_codes
|
| 128 |
+
result = MultiIndex(
|
| 129 |
+
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
|
| 130 |
+
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
|
| 131 |
+
tm.assert_index_equal(result, expected)
|
| 132 |
+
|
| 133 |
+
result = MultiIndex(
|
| 134 |
+
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
|
| 135 |
+
).set_codes([[0, -1, 1, 2, 3, 4]])
|
| 136 |
+
tm.assert_index_equal(result, expected)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def test_copy_in_constructor():
|
| 140 |
+
levels = np.array(["a", "b", "c"])
|
| 141 |
+
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
| 142 |
+
val = codes[0]
|
| 143 |
+
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
|
| 144 |
+
assert mi.codes[0][0] == val
|
| 145 |
+
codes[0] = 15
|
| 146 |
+
assert mi.codes[0][0] == val
|
| 147 |
+
val = levels[0]
|
| 148 |
+
levels[0] = "PANDA"
|
| 149 |
+
assert mi.levels[0][0] == val
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# ----------------------------------------------------------------------------
|
| 153 |
+
# from_arrays
|
| 154 |
+
# ----------------------------------------------------------------------------
|
| 155 |
+
def test_from_arrays(idx):
|
| 156 |
+
arrays = [
|
| 157 |
+
np.asarray(lev).take(level_codes)
|
| 158 |
+
for lev, level_codes in zip(idx.levels, idx.codes)
|
| 159 |
+
]
|
| 160 |
+
|
| 161 |
+
# list of arrays as input
|
| 162 |
+
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
| 163 |
+
tm.assert_index_equal(result, idx)
|
| 164 |
+
|
| 165 |
+
# infer correctly
|
| 166 |
+
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
|
| 167 |
+
assert result.levels[0].equals(Index([Timestamp("20130101")]))
|
| 168 |
+
assert result.levels[1].equals(Index(["a", "b"]))
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def test_from_arrays_iterator(idx):
|
| 172 |
+
# GH 18434
|
| 173 |
+
arrays = [
|
| 174 |
+
np.asarray(lev).take(level_codes)
|
| 175 |
+
for lev, level_codes in zip(idx.levels, idx.codes)
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
# iterator as input
|
| 179 |
+
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
| 180 |
+
tm.assert_index_equal(result, idx)
|
| 181 |
+
|
| 182 |
+
# invalid iterator input
|
| 183 |
+
msg = "Input must be a list / sequence of array-likes."
|
| 184 |
+
with pytest.raises(TypeError, match=msg):
|
| 185 |
+
MultiIndex.from_arrays(0)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def test_from_arrays_tuples(idx):
|
| 189 |
+
arrays = tuple(
|
| 190 |
+
tuple(np.asarray(lev).take(level_codes))
|
| 191 |
+
for lev, level_codes in zip(idx.levels, idx.codes)
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
# tuple of tuples as input
|
| 195 |
+
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
| 196 |
+
tm.assert_index_equal(result, idx)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@pytest.mark.parametrize(
|
| 200 |
+
("idx1", "idx2"),
|
| 201 |
+
[
|
| 202 |
+
(
|
| 203 |
+
pd.period_range("2011-01-01", freq="D", periods=3),
|
| 204 |
+
pd.period_range("2015-01-01", freq="h", periods=3),
|
| 205 |
+
),
|
| 206 |
+
(
|
| 207 |
+
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
|
| 208 |
+
date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
|
| 209 |
+
),
|
| 210 |
+
(
|
| 211 |
+
pd.timedelta_range("1 days", freq="D", periods=3),
|
| 212 |
+
pd.timedelta_range("2 hours", freq="h", periods=3),
|
| 213 |
+
),
|
| 214 |
+
],
|
| 215 |
+
)
|
| 216 |
+
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
|
| 217 |
+
result = MultiIndex.from_arrays([idx1, idx2])
|
| 218 |
+
tm.assert_index_equal(result.get_level_values(0), idx1)
|
| 219 |
+
tm.assert_index_equal(result.get_level_values(1), idx2)
|
| 220 |
+
|
| 221 |
+
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
| 222 |
+
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
| 223 |
+
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
| 224 |
+
|
| 225 |
+
tm.assert_index_equal(result, result2)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def test_from_arrays_index_datetimelike_mixed():
|
| 229 |
+
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
| 230 |
+
idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
|
| 231 |
+
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
|
| 232 |
+
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
|
| 233 |
+
|
| 234 |
+
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
| 235 |
+
tm.assert_index_equal(result.get_level_values(0), idx1)
|
| 236 |
+
tm.assert_index_equal(result.get_level_values(1), idx2)
|
| 237 |
+
tm.assert_index_equal(result.get_level_values(2), idx3)
|
| 238 |
+
tm.assert_index_equal(result.get_level_values(3), idx4)
|
| 239 |
+
|
| 240 |
+
result2 = MultiIndex.from_arrays(
|
| 241 |
+
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
|
| 242 |
+
)
|
| 243 |
+
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
| 244 |
+
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
| 245 |
+
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
| 246 |
+
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
| 247 |
+
|
| 248 |
+
tm.assert_index_equal(result, result2)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def test_from_arrays_index_series_categorical():
|
| 252 |
+
# GH13743
|
| 253 |
+
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
|
| 254 |
+
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
|
| 255 |
+
|
| 256 |
+
result = MultiIndex.from_arrays([idx1, idx2])
|
| 257 |
+
tm.assert_index_equal(result.get_level_values(0), idx1)
|
| 258 |
+
tm.assert_index_equal(result.get_level_values(1), idx2)
|
| 259 |
+
|
| 260 |
+
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
| 261 |
+
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
| 262 |
+
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
| 263 |
+
|
| 264 |
+
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
|
| 265 |
+
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
| 266 |
+
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def test_from_arrays_empty():
|
| 270 |
+
# 0 levels
|
| 271 |
+
msg = "Must pass non-zero number of levels/codes"
|
| 272 |
+
with pytest.raises(ValueError, match=msg):
|
| 273 |
+
MultiIndex.from_arrays(arrays=[])
|
| 274 |
+
|
| 275 |
+
# 1 level
|
| 276 |
+
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
|
| 277 |
+
assert isinstance(result, MultiIndex)
|
| 278 |
+
expected = Index([], name="A")
|
| 279 |
+
tm.assert_index_equal(result.levels[0], expected)
|
| 280 |
+
assert result.names == ["A"]
|
| 281 |
+
|
| 282 |
+
# N levels
|
| 283 |
+
for N in [2, 3]:
|
| 284 |
+
arrays = [[]] * N
|
| 285 |
+
names = list("ABC")[:N]
|
| 286 |
+
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
| 287 |
+
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
|
| 288 |
+
tm.assert_index_equal(result, expected)
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
@pytest.mark.parametrize(
|
| 292 |
+
"invalid_sequence_of_arrays",
|
| 293 |
+
[
|
| 294 |
+
1,
|
| 295 |
+
[1],
|
| 296 |
+
[1, 2],
|
| 297 |
+
[[1], 2],
|
| 298 |
+
[1, [2]],
|
| 299 |
+
"a",
|
| 300 |
+
["a"],
|
| 301 |
+
["a", "b"],
|
| 302 |
+
[["a"], "b"],
|
| 303 |
+
(1,),
|
| 304 |
+
(1, 2),
|
| 305 |
+
([1], 2),
|
| 306 |
+
(1, [2]),
|
| 307 |
+
"a",
|
| 308 |
+
("a",),
|
| 309 |
+
("a", "b"),
|
| 310 |
+
(["a"], "b"),
|
| 311 |
+
[(1,), 2],
|
| 312 |
+
[1, (2,)],
|
| 313 |
+
[("a",), "b"],
|
| 314 |
+
((1,), 2),
|
| 315 |
+
(1, (2,)),
|
| 316 |
+
(("a",), "b"),
|
| 317 |
+
],
|
| 318 |
+
)
|
| 319 |
+
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
|
| 320 |
+
msg = "Input must be a list / sequence of array-likes"
|
| 321 |
+
with pytest.raises(TypeError, match=msg):
|
| 322 |
+
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
@pytest.mark.parametrize(
|
| 326 |
+
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
|
| 327 |
+
)
|
| 328 |
+
def test_from_arrays_different_lengths(idx1, idx2):
|
| 329 |
+
# see gh-13599
|
| 330 |
+
msg = "^all arrays must be same length$"
|
| 331 |
+
with pytest.raises(ValueError, match=msg):
|
| 332 |
+
MultiIndex.from_arrays([idx1, idx2])
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
def test_from_arrays_respects_none_names():
|
| 336 |
+
# GH27292
|
| 337 |
+
a = Series([1, 2, 3], name="foo")
|
| 338 |
+
b = Series(["a", "b", "c"], name="bar")
|
| 339 |
+
|
| 340 |
+
result = MultiIndex.from_arrays([a, b], names=None)
|
| 341 |
+
expected = MultiIndex(
|
| 342 |
+
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
tm.assert_index_equal(result, expected)
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
# ----------------------------------------------------------------------------
|
| 349 |
+
# from_tuples
|
| 350 |
+
# ----------------------------------------------------------------------------
|
| 351 |
+
def test_from_tuples():
|
| 352 |
+
msg = "Cannot infer number of levels from empty list"
|
| 353 |
+
with pytest.raises(TypeError, match=msg):
|
| 354 |
+
MultiIndex.from_tuples([])
|
| 355 |
+
|
| 356 |
+
expected = MultiIndex(
|
| 357 |
+
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
# input tuples
|
| 361 |
+
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
|
| 362 |
+
tm.assert_index_equal(result, expected)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def test_from_tuples_iterator():
|
| 366 |
+
# GH 18434
|
| 367 |
+
# input iterator for tuples
|
| 368 |
+
expected = MultiIndex(
|
| 369 |
+
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
|
| 373 |
+
tm.assert_index_equal(result, expected)
|
| 374 |
+
|
| 375 |
+
# input non-iterables
|
| 376 |
+
msg = "Input must be a list / sequence of tuple-likes."
|
| 377 |
+
with pytest.raises(TypeError, match=msg):
|
| 378 |
+
MultiIndex.from_tuples(0)
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def test_from_tuples_empty():
|
| 382 |
+
# GH 16777
|
| 383 |
+
result = MultiIndex.from_tuples([], names=["a", "b"])
|
| 384 |
+
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
| 385 |
+
tm.assert_index_equal(result, expected)
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def test_from_tuples_index_values(idx):
|
| 389 |
+
result = MultiIndex.from_tuples(idx)
|
| 390 |
+
assert (result.values == idx.values).all()
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def test_tuples_with_name_string():
|
| 394 |
+
# GH 15110 and GH 14848
|
| 395 |
+
|
| 396 |
+
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
| 397 |
+
msg = "Names should be list-like for a MultiIndex"
|
| 398 |
+
with pytest.raises(ValueError, match=msg):
|
| 399 |
+
Index(li, name="abc")
|
| 400 |
+
with pytest.raises(ValueError, match=msg):
|
| 401 |
+
Index(li, name="a")
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def test_from_tuples_with_tuple_label():
|
| 405 |
+
# GH 15457
|
| 406 |
+
expected = pd.DataFrame(
|
| 407 |
+
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
|
| 408 |
+
).set_index(["a", "b"])
|
| 409 |
+
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
|
| 410 |
+
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
|
| 411 |
+
tm.assert_frame_equal(expected, result)
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
# ----------------------------------------------------------------------------
|
| 415 |
+
# from_product
|
| 416 |
+
# ----------------------------------------------------------------------------
|
| 417 |
+
def test_from_product_empty_zero_levels():
|
| 418 |
+
# 0 levels
|
| 419 |
+
msg = "Must pass non-zero number of levels/codes"
|
| 420 |
+
with pytest.raises(ValueError, match=msg):
|
| 421 |
+
MultiIndex.from_product([])
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def test_from_product_empty_one_level():
|
| 425 |
+
result = MultiIndex.from_product([[]], names=["A"])
|
| 426 |
+
expected = Index([], name="A")
|
| 427 |
+
tm.assert_index_equal(result.levels[0], expected)
|
| 428 |
+
assert result.names == ["A"]
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
@pytest.mark.parametrize(
|
| 432 |
+
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
|
| 433 |
+
)
|
| 434 |
+
def test_from_product_empty_two_levels(first, second):
|
| 435 |
+
names = ["A", "B"]
|
| 436 |
+
result = MultiIndex.from_product([first, second], names=names)
|
| 437 |
+
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
|
| 438 |
+
tm.assert_index_equal(result, expected)
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
@pytest.mark.parametrize("N", list(range(4)))
|
| 442 |
+
def test_from_product_empty_three_levels(N):
|
| 443 |
+
# GH12258
|
| 444 |
+
names = ["A", "B", "C"]
|
| 445 |
+
lvl2 = list(range(N))
|
| 446 |
+
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
| 447 |
+
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
|
| 448 |
+
tm.assert_index_equal(result, expected)
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
@pytest.mark.parametrize(
|
| 452 |
+
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
|
| 453 |
+
)
|
| 454 |
+
def test_from_product_invalid_input(invalid_input):
|
| 455 |
+
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
|
| 456 |
+
with pytest.raises(TypeError, match=msg):
|
| 457 |
+
MultiIndex.from_product(iterables=invalid_input)
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
def test_from_product_datetimeindex():
|
| 461 |
+
dt_index = date_range("2000-01-01", periods=2)
|
| 462 |
+
mi = MultiIndex.from_product([[1, 2], dt_index])
|
| 463 |
+
etalon = construct_1d_object_array_from_listlike(
|
| 464 |
+
[
|
| 465 |
+
(1, Timestamp("2000-01-01")),
|
| 466 |
+
(1, Timestamp("2000-01-02")),
|
| 467 |
+
(2, Timestamp("2000-01-01")),
|
| 468 |
+
(2, Timestamp("2000-01-02")),
|
| 469 |
+
]
|
| 470 |
+
)
|
| 471 |
+
tm.assert_numpy_array_equal(mi.values, etalon)
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
def test_from_product_rangeindex():
|
| 475 |
+
# RangeIndex is preserved by factorize, so preserved in levels
|
| 476 |
+
rng = Index(range(5))
|
| 477 |
+
other = ["a", "b"]
|
| 478 |
+
mi = MultiIndex.from_product([rng, other])
|
| 479 |
+
tm.assert_index_equal(mi._levels[0], rng, exact=True)
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
@pytest.mark.parametrize("ordered", [False, True])
|
| 483 |
+
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
|
| 484 |
+
def test_from_product_index_series_categorical(ordered, f):
|
| 485 |
+
# GH13743
|
| 486 |
+
first = ["foo", "bar"]
|
| 487 |
+
|
| 488 |
+
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
|
| 489 |
+
expected = pd.CategoricalIndex(
|
| 490 |
+
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
|
| 491 |
+
)
|
| 492 |
+
|
| 493 |
+
result = MultiIndex.from_product([first, f(idx)])
|
| 494 |
+
tm.assert_index_equal(result.get_level_values(1), expected)
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
def test_from_product():
|
| 498 |
+
first = ["foo", "bar", "buz"]
|
| 499 |
+
second = ["a", "b", "c"]
|
| 500 |
+
names = ["first", "second"]
|
| 501 |
+
result = MultiIndex.from_product([first, second], names=names)
|
| 502 |
+
|
| 503 |
+
tuples = [
|
| 504 |
+
("foo", "a"),
|
| 505 |
+
("foo", "b"),
|
| 506 |
+
("foo", "c"),
|
| 507 |
+
("bar", "a"),
|
| 508 |
+
("bar", "b"),
|
| 509 |
+
("bar", "c"),
|
| 510 |
+
("buz", "a"),
|
| 511 |
+
("buz", "b"),
|
| 512 |
+
("buz", "c"),
|
| 513 |
+
]
|
| 514 |
+
expected = MultiIndex.from_tuples(tuples, names=names)
|
| 515 |
+
|
| 516 |
+
tm.assert_index_equal(result, expected)
|
| 517 |
+
|
| 518 |
+
|
| 519 |
+
def test_from_product_iterator():
|
| 520 |
+
# GH 18434
|
| 521 |
+
first = ["foo", "bar", "buz"]
|
| 522 |
+
second = ["a", "b", "c"]
|
| 523 |
+
names = ["first", "second"]
|
| 524 |
+
tuples = [
|
| 525 |
+
("foo", "a"),
|
| 526 |
+
("foo", "b"),
|
| 527 |
+
("foo", "c"),
|
| 528 |
+
("bar", "a"),
|
| 529 |
+
("bar", "b"),
|
| 530 |
+
("bar", "c"),
|
| 531 |
+
("buz", "a"),
|
| 532 |
+
("buz", "b"),
|
| 533 |
+
("buz", "c"),
|
| 534 |
+
]
|
| 535 |
+
expected = MultiIndex.from_tuples(tuples, names=names)
|
| 536 |
+
|
| 537 |
+
# iterator as input
|
| 538 |
+
result = MultiIndex.from_product(iter([first, second]), names=names)
|
| 539 |
+
tm.assert_index_equal(result, expected)
|
| 540 |
+
|
| 541 |
+
# Invalid non-iterable input
|
| 542 |
+
msg = "Input must be a list / sequence of iterables."
|
| 543 |
+
with pytest.raises(TypeError, match=msg):
|
| 544 |
+
MultiIndex.from_product(0)
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
@pytest.mark.parametrize(
|
| 548 |
+
"a, b, expected_names",
|
| 549 |
+
[
|
| 550 |
+
(
|
| 551 |
+
Series([1, 2, 3], name="foo"),
|
| 552 |
+
Series(["a", "b"], name="bar"),
|
| 553 |
+
["foo", "bar"],
|
| 554 |
+
),
|
| 555 |
+
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
|
| 556 |
+
([1, 2, 3], ["a", "b"], None),
|
| 557 |
+
],
|
| 558 |
+
)
|
| 559 |
+
def test_from_product_infer_names(a, b, expected_names):
|
| 560 |
+
# GH27292
|
| 561 |
+
result = MultiIndex.from_product([a, b])
|
| 562 |
+
expected = MultiIndex(
|
| 563 |
+
levels=[[1, 2, 3], ["a", "b"]],
|
| 564 |
+
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
| 565 |
+
names=expected_names,
|
| 566 |
+
)
|
| 567 |
+
tm.assert_index_equal(result, expected)
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
def test_from_product_respects_none_names():
|
| 571 |
+
# GH27292
|
| 572 |
+
a = Series([1, 2, 3], name="foo")
|
| 573 |
+
b = Series(["a", "b"], name="bar")
|
| 574 |
+
|
| 575 |
+
result = MultiIndex.from_product([a, b], names=None)
|
| 576 |
+
expected = MultiIndex(
|
| 577 |
+
levels=[[1, 2, 3], ["a", "b"]],
|
| 578 |
+
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
| 579 |
+
names=None,
|
| 580 |
+
)
|
| 581 |
+
tm.assert_index_equal(result, expected)
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
def test_from_product_readonly():
|
| 585 |
+
# GH#15286 passing read-only array to from_product
|
| 586 |
+
a = np.array(range(3))
|
| 587 |
+
b = ["a", "b"]
|
| 588 |
+
expected = MultiIndex.from_product([a, b])
|
| 589 |
+
|
| 590 |
+
a.setflags(write=False)
|
| 591 |
+
result = MultiIndex.from_product([a, b])
|
| 592 |
+
tm.assert_index_equal(result, expected)
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
def test_create_index_existing_name(idx):
|
| 596 |
+
# GH11193, when an existing index is passed, and a new name is not
|
| 597 |
+
# specified, the new index should inherit the previous object name
|
| 598 |
+
index = idx
|
| 599 |
+
index.names = ["foo", "bar"]
|
| 600 |
+
result = Index(index)
|
| 601 |
+
expected = Index(
|
| 602 |
+
Index(
|
| 603 |
+
[
|
| 604 |
+
("foo", "one"),
|
| 605 |
+
("foo", "two"),
|
| 606 |
+
("bar", "one"),
|
| 607 |
+
("baz", "two"),
|
| 608 |
+
("qux", "one"),
|
| 609 |
+
("qux", "two"),
|
| 610 |
+
],
|
| 611 |
+
dtype="object",
|
| 612 |
+
)
|
| 613 |
+
)
|
| 614 |
+
tm.assert_index_equal(result, expected)
|
| 615 |
+
|
| 616 |
+
result = Index(index, name="A")
|
| 617 |
+
expected = Index(
|
| 618 |
+
Index(
|
| 619 |
+
[
|
| 620 |
+
("foo", "one"),
|
| 621 |
+
("foo", "two"),
|
| 622 |
+
("bar", "one"),
|
| 623 |
+
("baz", "two"),
|
| 624 |
+
("qux", "one"),
|
| 625 |
+
("qux", "two"),
|
| 626 |
+
],
|
| 627 |
+
dtype="object",
|
| 628 |
+
),
|
| 629 |
+
name="A",
|
| 630 |
+
)
|
| 631 |
+
tm.assert_index_equal(result, expected)
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
# ----------------------------------------------------------------------------
|
| 635 |
+
# from_frame
|
| 636 |
+
# ----------------------------------------------------------------------------
|
| 637 |
+
def test_from_frame():
|
| 638 |
+
# GH 22420
|
| 639 |
+
df = pd.DataFrame(
|
| 640 |
+
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
|
| 641 |
+
)
|
| 642 |
+
expected = MultiIndex.from_tuples(
|
| 643 |
+
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
|
| 644 |
+
)
|
| 645 |
+
result = MultiIndex.from_frame(df)
|
| 646 |
+
tm.assert_index_equal(expected, result)
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
def test_from_frame_missing_values_multiIndex():
|
| 650 |
+
# GH 39984
|
| 651 |
+
pa = pytest.importorskip("pyarrow")
|
| 652 |
+
|
| 653 |
+
df = pd.DataFrame(
|
| 654 |
+
{
|
| 655 |
+
"a": Series([1, 2, None], dtype="Int64"),
|
| 656 |
+
"b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
| 657 |
+
}
|
| 658 |
+
)
|
| 659 |
+
multi_indexed = MultiIndex.from_frame(df)
|
| 660 |
+
expected = MultiIndex.from_arrays(
|
| 661 |
+
[
|
| 662 |
+
Series([1, 2, None]).astype("Int64"),
|
| 663 |
+
pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
| 664 |
+
],
|
| 665 |
+
names=["a", "b"],
|
| 666 |
+
)
|
| 667 |
+
tm.assert_index_equal(multi_indexed, expected)
|
| 668 |
+
|
| 669 |
+
|
| 670 |
+
@pytest.mark.parametrize(
|
| 671 |
+
"non_frame",
|
| 672 |
+
[
|
| 673 |
+
Series([1, 2, 3, 4]),
|
| 674 |
+
[1, 2, 3, 4],
|
| 675 |
+
[[1, 2], [3, 4], [5, 6]],
|
| 676 |
+
Index([1, 2, 3, 4]),
|
| 677 |
+
np.array([[1, 2], [3, 4], [5, 6]]),
|
| 678 |
+
27,
|
| 679 |
+
],
|
| 680 |
+
)
|
| 681 |
+
def test_from_frame_error(non_frame):
|
| 682 |
+
# GH 22420
|
| 683 |
+
with pytest.raises(TypeError, match="Input must be a DataFrame"):
|
| 684 |
+
MultiIndex.from_frame(non_frame)
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
def test_from_frame_dtype_fidelity():
|
| 688 |
+
# GH 22420
|
| 689 |
+
df = pd.DataFrame(
|
| 690 |
+
{
|
| 691 |
+
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
|
| 692 |
+
"a": [1, 1, 1, 2, 2, 2],
|
| 693 |
+
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
| 694 |
+
"c": ["x", "x", "y", "z", "x", "y"],
|
| 695 |
+
}
|
| 696 |
+
)
|
| 697 |
+
original_dtypes = df.dtypes.to_dict()
|
| 698 |
+
|
| 699 |
+
expected_mi = MultiIndex.from_arrays(
|
| 700 |
+
[
|
| 701 |
+
date_range("19910905", periods=6, tz="US/Eastern"),
|
| 702 |
+
[1, 1, 1, 2, 2, 2],
|
| 703 |
+
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
| 704 |
+
["x", "x", "y", "z", "x", "y"],
|
| 705 |
+
],
|
| 706 |
+
names=["dates", "a", "b", "c"],
|
| 707 |
+
)
|
| 708 |
+
mi = MultiIndex.from_frame(df)
|
| 709 |
+
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
| 710 |
+
|
| 711 |
+
tm.assert_index_equal(expected_mi, mi)
|
| 712 |
+
assert original_dtypes == mi_dtypes
|
| 713 |
+
|
| 714 |
+
|
| 715 |
+
@pytest.mark.parametrize(
|
| 716 |
+
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
|
| 717 |
+
)
|
| 718 |
+
def test_from_frame_valid_names(names_in, names_out):
|
| 719 |
+
# GH 22420
|
| 720 |
+
df = pd.DataFrame(
|
| 721 |
+
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
| 722 |
+
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
| 723 |
+
)
|
| 724 |
+
mi = MultiIndex.from_frame(df, names=names_in)
|
| 725 |
+
assert mi.names == names_out
|
| 726 |
+
|
| 727 |
+
|
| 728 |
+
@pytest.mark.parametrize(
|
| 729 |
+
"names,expected_error_msg",
|
| 730 |
+
[
|
| 731 |
+
("bad_input", "Names should be list-like for a MultiIndex"),
|
| 732 |
+
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
|
| 733 |
+
],
|
| 734 |
+
)
|
| 735 |
+
def test_from_frame_invalid_names(names, expected_error_msg):
|
| 736 |
+
# GH 22420
|
| 737 |
+
df = pd.DataFrame(
|
| 738 |
+
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
| 739 |
+
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
| 740 |
+
)
|
| 741 |
+
with pytest.raises(ValueError, match=expected_error_msg):
|
| 742 |
+
MultiIndex.from_frame(df, names=names)
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
def test_index_equal_empty_iterable():
|
| 746 |
+
# #16844
|
| 747 |
+
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
|
| 748 |
+
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
| 749 |
+
tm.assert_index_equal(a, b)
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
def test_raise_invalid_sortorder():
|
| 753 |
+
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
|
| 754 |
+
# GH#28518
|
| 755 |
+
|
| 756 |
+
levels = [[0, 1], [0, 1, 2]]
|
| 757 |
+
|
| 758 |
+
# Correct sortorder
|
| 759 |
+
MultiIndex(
|
| 760 |
+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
| 761 |
+
)
|
| 762 |
+
|
| 763 |
+
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
|
| 764 |
+
MultiIndex(
|
| 765 |
+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
+
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
|
| 769 |
+
MultiIndex(
|
| 770 |
+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
|
| 771 |
+
)
|
| 772 |
+
|
| 773 |
+
|
| 774 |
+
def test_datetimeindex():
|
| 775 |
+
idx1 = pd.DatetimeIndex(
|
| 776 |
+
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
|
| 777 |
+
)
|
| 778 |
+
idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
|
| 779 |
+
idx = MultiIndex.from_arrays([idx1, idx2])
|
| 780 |
+
|
| 781 |
+
expected1 = pd.DatetimeIndex(
|
| 782 |
+
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
|
| 783 |
+
)
|
| 784 |
+
|
| 785 |
+
tm.assert_index_equal(idx.levels[0], expected1)
|
| 786 |
+
tm.assert_index_equal(idx.levels[1], idx2)
|
| 787 |
+
|
| 788 |
+
# from datetime combos
|
| 789 |
+
# GH 7888
|
| 790 |
+
date1 = np.datetime64("today")
|
| 791 |
+
date2 = datetime.today()
|
| 792 |
+
date3 = Timestamp.today()
|
| 793 |
+
|
| 794 |
+
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
|
| 795 |
+
index = MultiIndex.from_product([[d1], [d2]])
|
| 796 |
+
assert isinstance(index.levels[0], pd.DatetimeIndex)
|
| 797 |
+
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
| 798 |
+
|
| 799 |
+
# but NOT date objects, matching Index behavior
|
| 800 |
+
date4 = date.today()
|
| 801 |
+
index = MultiIndex.from_product([[date4], [date2]])
|
| 802 |
+
assert not isinstance(index.levels[0], pd.DatetimeIndex)
|
| 803 |
+
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
| 804 |
+
|
| 805 |
+
|
| 806 |
+
def test_constructor_with_tz():
|
| 807 |
+
index = pd.DatetimeIndex(
|
| 808 |
+
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
|
| 809 |
+
)
|
| 810 |
+
columns = pd.DatetimeIndex(
|
| 811 |
+
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
|
| 812 |
+
)
|
| 813 |
+
|
| 814 |
+
result = MultiIndex.from_arrays([index, columns])
|
| 815 |
+
|
| 816 |
+
assert result.names == ["dt1", "dt2"]
|
| 817 |
+
tm.assert_index_equal(result.levels[0], index)
|
| 818 |
+
tm.assert_index_equal(result.levels[1], columns)
|
| 819 |
+
|
| 820 |
+
result = MultiIndex.from_arrays([Series(index), Series(columns)])
|
| 821 |
+
|
| 822 |
+
assert result.names == ["dt1", "dt2"]
|
| 823 |
+
tm.assert_index_equal(result.levels[0], index)
|
| 824 |
+
tm.assert_index_equal(result.levels[1], columns)
|
| 825 |
+
|
| 826 |
+
|
| 827 |
+
def test_multiindex_inference_consistency():
|
| 828 |
+
# check that inference behavior matches the base class
|
| 829 |
+
|
| 830 |
+
v = date.today()
|
| 831 |
+
|
| 832 |
+
arr = [v, v]
|
| 833 |
+
|
| 834 |
+
idx = Index(arr)
|
| 835 |
+
assert idx.dtype == object
|
| 836 |
+
|
| 837 |
+
mi = MultiIndex.from_arrays([arr])
|
| 838 |
+
lev = mi.levels[0]
|
| 839 |
+
assert lev.dtype == object
|
| 840 |
+
|
| 841 |
+
mi = MultiIndex.from_product([arr])
|
| 842 |
+
lev = mi.levels[0]
|
| 843 |
+
assert lev.dtype == object
|
| 844 |
+
|
| 845 |
+
mi = MultiIndex.from_tuples([(x,) for x in arr])
|
| 846 |
+
lev = mi.levels[0]
|
| 847 |
+
assert lev.dtype == object
|
| 848 |
+
|
| 849 |
+
|
| 850 |
+
def test_dtype_representation(using_infer_string):
|
| 851 |
+
# GH#46900
|
| 852 |
+
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
|
| 853 |
+
result = pmidx.dtypes
|
| 854 |
+
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
| 855 |
+
expected = Series(
|
| 856 |
+
["int64", exp],
|
| 857 |
+
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
|
| 858 |
+
dtype=object,
|
| 859 |
+
)
|
| 860 |
+
tm.assert_series_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.compat.numpy import np_version_gt2
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
DataFrame,
|
| 9 |
+
MultiIndex,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_to_numpy(idx):
|
| 15 |
+
result = idx.to_numpy()
|
| 16 |
+
exp = idx.values
|
| 17 |
+
tm.assert_numpy_array_equal(result, exp)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_array_interface(idx):
|
| 21 |
+
# https://github.com/pandas-dev/pandas/pull/60046
|
| 22 |
+
result = np.asarray(idx)
|
| 23 |
+
expected = np.empty((6,), dtype=object)
|
| 24 |
+
expected[:] = [
|
| 25 |
+
("foo", "one"),
|
| 26 |
+
("foo", "two"),
|
| 27 |
+
("bar", "one"),
|
| 28 |
+
("baz", "two"),
|
| 29 |
+
("qux", "one"),
|
| 30 |
+
("qux", "two"),
|
| 31 |
+
]
|
| 32 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 33 |
+
|
| 34 |
+
# it always gives a copy by default, but the values are cached, so results
|
| 35 |
+
# are still sharing memory
|
| 36 |
+
result_copy1 = np.asarray(idx)
|
| 37 |
+
result_copy2 = np.asarray(idx)
|
| 38 |
+
assert np.may_share_memory(result_copy1, result_copy2)
|
| 39 |
+
|
| 40 |
+
# with explicit copy=True, then it is an actual copy
|
| 41 |
+
result_copy1 = np.array(idx, copy=True)
|
| 42 |
+
result_copy2 = np.array(idx, copy=True)
|
| 43 |
+
assert not np.may_share_memory(result_copy1, result_copy2)
|
| 44 |
+
|
| 45 |
+
if not np_version_gt2:
|
| 46 |
+
# copy=False semantics are only supported in NumPy>=2.
|
| 47 |
+
return
|
| 48 |
+
|
| 49 |
+
# for MultiIndex, copy=False is never allowed
|
| 50 |
+
msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
|
| 51 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 52 |
+
np.array(idx, copy=False)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_to_frame():
|
| 56 |
+
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
| 57 |
+
|
| 58 |
+
index = MultiIndex.from_tuples(tuples)
|
| 59 |
+
result = index.to_frame(index=False)
|
| 60 |
+
expected = DataFrame(tuples)
|
| 61 |
+
tm.assert_frame_equal(result, expected)
|
| 62 |
+
|
| 63 |
+
result = index.to_frame()
|
| 64 |
+
expected.index = index
|
| 65 |
+
tm.assert_frame_equal(result, expected)
|
| 66 |
+
|
| 67 |
+
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
| 68 |
+
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
| 69 |
+
result = index.to_frame(index=False)
|
| 70 |
+
expected = DataFrame(tuples)
|
| 71 |
+
expected.columns = ["first", "second"]
|
| 72 |
+
tm.assert_frame_equal(result, expected)
|
| 73 |
+
|
| 74 |
+
result = index.to_frame()
|
| 75 |
+
expected.index = index
|
| 76 |
+
tm.assert_frame_equal(result, expected)
|
| 77 |
+
|
| 78 |
+
# See GH-22580
|
| 79 |
+
index = MultiIndex.from_tuples(tuples)
|
| 80 |
+
result = index.to_frame(index=False, name=["first", "second"])
|
| 81 |
+
expected = DataFrame(tuples)
|
| 82 |
+
expected.columns = ["first", "second"]
|
| 83 |
+
tm.assert_frame_equal(result, expected)
|
| 84 |
+
|
| 85 |
+
result = index.to_frame(name=["first", "second"])
|
| 86 |
+
expected.index = index
|
| 87 |
+
expected.columns = ["first", "second"]
|
| 88 |
+
tm.assert_frame_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
msg = "'name' must be a list / sequence of column names."
|
| 91 |
+
with pytest.raises(TypeError, match=msg):
|
| 92 |
+
index.to_frame(name="first")
|
| 93 |
+
|
| 94 |
+
msg = "'name' should have same length as number of levels on index."
|
| 95 |
+
with pytest.raises(ValueError, match=msg):
|
| 96 |
+
index.to_frame(name=["first"])
|
| 97 |
+
|
| 98 |
+
# Tests for datetime index
|
| 99 |
+
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
|
| 100 |
+
result = index.to_frame(index=False)
|
| 101 |
+
expected = DataFrame(
|
| 102 |
+
{
|
| 103 |
+
0: np.repeat(np.arange(5, dtype="int64"), 3),
|
| 104 |
+
1: np.tile(pd.date_range("20130101", periods=3), 5),
|
| 105 |
+
}
|
| 106 |
+
)
|
| 107 |
+
tm.assert_frame_equal(result, expected)
|
| 108 |
+
|
| 109 |
+
result = index.to_frame()
|
| 110 |
+
expected.index = index
|
| 111 |
+
tm.assert_frame_equal(result, expected)
|
| 112 |
+
|
| 113 |
+
# See GH-22580
|
| 114 |
+
result = index.to_frame(index=False, name=["first", "second"])
|
| 115 |
+
expected = DataFrame(
|
| 116 |
+
{
|
| 117 |
+
"first": np.repeat(np.arange(5, dtype="int64"), 3),
|
| 118 |
+
"second": np.tile(pd.date_range("20130101", periods=3), 5),
|
| 119 |
+
}
|
| 120 |
+
)
|
| 121 |
+
tm.assert_frame_equal(result, expected)
|
| 122 |
+
|
| 123 |
+
result = index.to_frame(name=["first", "second"])
|
| 124 |
+
expected.index = index
|
| 125 |
+
tm.assert_frame_equal(result, expected)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def test_to_frame_dtype_fidelity():
|
| 129 |
+
# GH 22420
|
| 130 |
+
mi = MultiIndex.from_arrays(
|
| 131 |
+
[
|
| 132 |
+
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
| 133 |
+
[1, 1, 1, 2, 2, 2],
|
| 134 |
+
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
| 135 |
+
["x", "x", "y", "z", "x", "y"],
|
| 136 |
+
],
|
| 137 |
+
names=["dates", "a", "b", "c"],
|
| 138 |
+
)
|
| 139 |
+
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
| 140 |
+
|
| 141 |
+
expected_df = DataFrame(
|
| 142 |
+
{
|
| 143 |
+
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
| 144 |
+
"a": [1, 1, 1, 2, 2, 2],
|
| 145 |
+
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
| 146 |
+
"c": ["x", "x", "y", "z", "x", "y"],
|
| 147 |
+
}
|
| 148 |
+
)
|
| 149 |
+
df = mi.to_frame(index=False)
|
| 150 |
+
df_dtypes = df.dtypes.to_dict()
|
| 151 |
+
|
| 152 |
+
tm.assert_frame_equal(df, expected_df)
|
| 153 |
+
assert original_dtypes == df_dtypes
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def test_to_frame_resulting_column_order():
|
| 157 |
+
# GH 22420
|
| 158 |
+
expected = ["z", 0, "a"]
|
| 159 |
+
mi = MultiIndex.from_arrays(
|
| 160 |
+
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
|
| 161 |
+
)
|
| 162 |
+
result = mi.to_frame().columns.tolist()
|
| 163 |
+
assert result == expected
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def test_to_frame_duplicate_labels():
|
| 167 |
+
# GH 45245
|
| 168 |
+
data = [(1, 2), (3, 4)]
|
| 169 |
+
names = ["a", "a"]
|
| 170 |
+
index = MultiIndex.from_tuples(data, names=names)
|
| 171 |
+
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
| 172 |
+
index.to_frame()
|
| 173 |
+
|
| 174 |
+
result = index.to_frame(allow_duplicates=True)
|
| 175 |
+
expected = DataFrame(data, index=index, columns=names)
|
| 176 |
+
tm.assert_frame_equal(result, expected)
|
| 177 |
+
|
| 178 |
+
names = [None, 0]
|
| 179 |
+
index = MultiIndex.from_tuples(data, names=names)
|
| 180 |
+
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
| 181 |
+
index.to_frame()
|
| 182 |
+
|
| 183 |
+
result = index.to_frame(allow_duplicates=True)
|
| 184 |
+
expected = DataFrame(data, index=index, columns=[0, 0])
|
| 185 |
+
tm.assert_frame_equal(result, expected)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def test_to_flat_index(idx):
|
| 189 |
+
expected = pd.Index(
|
| 190 |
+
(
|
| 191 |
+
("foo", "one"),
|
| 192 |
+
("foo", "two"),
|
| 193 |
+
("bar", "one"),
|
| 194 |
+
("baz", "two"),
|
| 195 |
+
("qux", "one"),
|
| 196 |
+
("qux", "two"),
|
| 197 |
+
),
|
| 198 |
+
tupleize_cols=False,
|
| 199 |
+
)
|
| 200 |
+
result = idx.to_flat_index()
|
| 201 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from copy import (
|
| 2 |
+
copy,
|
| 3 |
+
deepcopy,
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
import pytest
|
| 7 |
+
|
| 8 |
+
from pandas import MultiIndex
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def assert_multiindex_copied(copy, original):
|
| 13 |
+
# Levels should be (at least, shallow copied)
|
| 14 |
+
tm.assert_copy(copy.levels, original.levels)
|
| 15 |
+
tm.assert_almost_equal(copy.codes, original.codes)
|
| 16 |
+
|
| 17 |
+
# Labels doesn't matter which way copied
|
| 18 |
+
tm.assert_almost_equal(copy.codes, original.codes)
|
| 19 |
+
assert copy.codes is not original.codes
|
| 20 |
+
|
| 21 |
+
# Names doesn't matter which way copied
|
| 22 |
+
assert copy.names == original.names
|
| 23 |
+
assert copy.names is not original.names
|
| 24 |
+
|
| 25 |
+
# Sort order should be copied
|
| 26 |
+
assert copy.sortorder == original.sortorder
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def test_copy(idx):
|
| 30 |
+
i_copy = idx.copy()
|
| 31 |
+
|
| 32 |
+
assert_multiindex_copied(i_copy, idx)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_shallow_copy(idx):
|
| 36 |
+
i_copy = idx._view()
|
| 37 |
+
|
| 38 |
+
assert_multiindex_copied(i_copy, idx)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_view(idx):
|
| 42 |
+
i_view = idx.view()
|
| 43 |
+
assert_multiindex_copied(i_view, idx)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@pytest.mark.parametrize("func", [copy, deepcopy])
|
| 47 |
+
def test_copy_and_deepcopy(func):
|
| 48 |
+
idx = MultiIndex(
|
| 49 |
+
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
| 50 |
+
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
| 51 |
+
names=["first", "second"],
|
| 52 |
+
)
|
| 53 |
+
idx_copy = func(idx)
|
| 54 |
+
assert idx_copy is not idx
|
| 55 |
+
assert idx_copy.equals(idx)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@pytest.mark.parametrize("deep", [True, False])
|
| 59 |
+
def test_copy_method(deep):
|
| 60 |
+
idx = MultiIndex(
|
| 61 |
+
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
| 62 |
+
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
| 63 |
+
names=["first", "second"],
|
| 64 |
+
)
|
| 65 |
+
idx_copy = idx.copy(deep=deep)
|
| 66 |
+
assert idx_copy.equals(idx)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@pytest.mark.parametrize("deep", [True, False])
|
| 70 |
+
@pytest.mark.parametrize(
|
| 71 |
+
"kwarg, value",
|
| 72 |
+
[
|
| 73 |
+
("names", ["third", "fourth"]),
|
| 74 |
+
],
|
| 75 |
+
)
|
| 76 |
+
def test_copy_method_kwargs(deep, kwarg, value):
|
| 77 |
+
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
| 78 |
+
idx = MultiIndex(
|
| 79 |
+
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
| 80 |
+
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
| 81 |
+
names=["first", "second"],
|
| 82 |
+
)
|
| 83 |
+
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
|
| 84 |
+
assert getattr(idx_copy, kwarg) == value
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def test_copy_deep_false_retains_id():
|
| 88 |
+
# GH#47878
|
| 89 |
+
idx = MultiIndex(
|
| 90 |
+
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
| 91 |
+
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
| 92 |
+
names=["first", "second"],
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
res = idx.copy(deep=False)
|
| 96 |
+
assert res._id is idx._id
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.errors import PerformanceWarning
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
Index,
|
| 9 |
+
MultiIndex,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_drop(idx):
|
| 15 |
+
dropped = idx.drop([("foo", "two"), ("qux", "one")])
|
| 16 |
+
|
| 17 |
+
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
|
| 18 |
+
dropped2 = idx.drop(index)
|
| 19 |
+
|
| 20 |
+
expected = idx[[0, 2, 3, 5]]
|
| 21 |
+
tm.assert_index_equal(dropped, expected)
|
| 22 |
+
tm.assert_index_equal(dropped2, expected)
|
| 23 |
+
|
| 24 |
+
dropped = idx.drop(["bar"])
|
| 25 |
+
expected = idx[[0, 1, 3, 4, 5]]
|
| 26 |
+
tm.assert_index_equal(dropped, expected)
|
| 27 |
+
|
| 28 |
+
dropped = idx.drop("foo")
|
| 29 |
+
expected = idx[[2, 3, 4, 5]]
|
| 30 |
+
tm.assert_index_equal(dropped, expected)
|
| 31 |
+
|
| 32 |
+
index = MultiIndex.from_tuples([("bar", "two")])
|
| 33 |
+
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
| 34 |
+
idx.drop([("bar", "two")])
|
| 35 |
+
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
| 36 |
+
idx.drop(index)
|
| 37 |
+
with pytest.raises(KeyError, match=r"^'two'$"):
|
| 38 |
+
idx.drop(["foo", "two"])
|
| 39 |
+
|
| 40 |
+
# partially correct argument
|
| 41 |
+
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
|
| 42 |
+
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
| 43 |
+
idx.drop(mixed_index)
|
| 44 |
+
|
| 45 |
+
# error='ignore'
|
| 46 |
+
dropped = idx.drop(index, errors="ignore")
|
| 47 |
+
expected = idx[[0, 1, 2, 3, 4, 5]]
|
| 48 |
+
tm.assert_index_equal(dropped, expected)
|
| 49 |
+
|
| 50 |
+
dropped = idx.drop(mixed_index, errors="ignore")
|
| 51 |
+
expected = idx[[0, 1, 2, 3, 5]]
|
| 52 |
+
tm.assert_index_equal(dropped, expected)
|
| 53 |
+
|
| 54 |
+
dropped = idx.drop(["foo", "two"], errors="ignore")
|
| 55 |
+
expected = idx[[2, 3, 4, 5]]
|
| 56 |
+
tm.assert_index_equal(dropped, expected)
|
| 57 |
+
|
| 58 |
+
# mixed partial / full drop
|
| 59 |
+
dropped = idx.drop(["foo", ("qux", "one")])
|
| 60 |
+
expected = idx[[2, 3, 5]]
|
| 61 |
+
tm.assert_index_equal(dropped, expected)
|
| 62 |
+
|
| 63 |
+
# mixed partial / full drop / error='ignore'
|
| 64 |
+
mixed_index = ["foo", ("qux", "one"), "two"]
|
| 65 |
+
with pytest.raises(KeyError, match=r"^'two'$"):
|
| 66 |
+
idx.drop(mixed_index)
|
| 67 |
+
dropped = idx.drop(mixed_index, errors="ignore")
|
| 68 |
+
expected = idx[[2, 3, 5]]
|
| 69 |
+
tm.assert_index_equal(dropped, expected)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_droplevel_with_names(idx):
|
| 73 |
+
index = idx[idx.get_loc("foo")]
|
| 74 |
+
dropped = index.droplevel(0)
|
| 75 |
+
assert dropped.name == "second"
|
| 76 |
+
|
| 77 |
+
index = MultiIndex(
|
| 78 |
+
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
| 79 |
+
codes=[
|
| 80 |
+
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
| 81 |
+
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
| 82 |
+
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
| 83 |
+
],
|
| 84 |
+
names=["one", "two", "three"],
|
| 85 |
+
)
|
| 86 |
+
dropped = index.droplevel(0)
|
| 87 |
+
assert dropped.names == ("two", "three")
|
| 88 |
+
|
| 89 |
+
dropped = index.droplevel("two")
|
| 90 |
+
expected = index.droplevel(1)
|
| 91 |
+
assert dropped.equals(expected)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def test_droplevel_list():
|
| 95 |
+
index = MultiIndex(
|
| 96 |
+
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
| 97 |
+
codes=[
|
| 98 |
+
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
| 99 |
+
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
| 100 |
+
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
| 101 |
+
],
|
| 102 |
+
names=["one", "two", "three"],
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
dropped = index[:2].droplevel(["three", "one"])
|
| 106 |
+
expected = index[:2].droplevel(2).droplevel(0)
|
| 107 |
+
assert dropped.equals(expected)
|
| 108 |
+
|
| 109 |
+
dropped = index[:2].droplevel([])
|
| 110 |
+
expected = index[:2]
|
| 111 |
+
assert dropped.equals(expected)
|
| 112 |
+
|
| 113 |
+
msg = (
|
| 114 |
+
"Cannot remove 3 levels from an index with 3 levels: "
|
| 115 |
+
"at least one level must be left"
|
| 116 |
+
)
|
| 117 |
+
with pytest.raises(ValueError, match=msg):
|
| 118 |
+
index[:2].droplevel(["one", "two", "three"])
|
| 119 |
+
|
| 120 |
+
with pytest.raises(KeyError, match="'Level four not found'"):
|
| 121 |
+
index[:2].droplevel(["one", "four"])
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def test_drop_not_lexsorted():
|
| 125 |
+
# GH 12078
|
| 126 |
+
|
| 127 |
+
# define the lexsorted version of the multi-index
|
| 128 |
+
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
|
| 129 |
+
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
|
| 130 |
+
assert lexsorted_mi._is_lexsorted()
|
| 131 |
+
|
| 132 |
+
# and the not-lexsorted version
|
| 133 |
+
df = pd.DataFrame(
|
| 134 |
+
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
|
| 135 |
+
)
|
| 136 |
+
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
|
| 137 |
+
df = df.reset_index()
|
| 138 |
+
not_lexsorted_mi = df.columns
|
| 139 |
+
assert not not_lexsorted_mi._is_lexsorted()
|
| 140 |
+
|
| 141 |
+
# compare the results
|
| 142 |
+
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
| 143 |
+
with tm.assert_produces_warning(PerformanceWarning):
|
| 144 |
+
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def test_drop_with_nan_in_index(nulls_fixture):
|
| 148 |
+
# GH#18853
|
| 149 |
+
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
|
| 150 |
+
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
|
| 151 |
+
with pytest.raises(KeyError, match=msg):
|
| 152 |
+
mi.drop(pd.Timestamp("2001"), level="date")
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
| 156 |
+
def test_drop_with_non_monotonic_duplicates():
|
| 157 |
+
# GH#33494
|
| 158 |
+
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
|
| 159 |
+
result = mi.drop((1, 2))
|
| 160 |
+
expected = MultiIndex.from_tuples([(2, 3)])
|
| 161 |
+
tm.assert_index_equal(result, expected)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def test_single_level_drop_partially_missing_elements():
|
| 165 |
+
# GH 37820
|
| 166 |
+
|
| 167 |
+
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
|
| 168 |
+
msg = r"labels \[4\] not found in level"
|
| 169 |
+
with pytest.raises(KeyError, match=msg):
|
| 170 |
+
mi.drop(4, level=0)
|
| 171 |
+
with pytest.raises(KeyError, match=msg):
|
| 172 |
+
mi.drop([1, 4], level=0)
|
| 173 |
+
msg = r"labels \[nan\] not found in level"
|
| 174 |
+
with pytest.raises(KeyError, match=msg):
|
| 175 |
+
mi.drop([np.nan], level=0)
|
| 176 |
+
with pytest.raises(KeyError, match=msg):
|
| 177 |
+
mi.drop([np.nan, 1, 2, 3], level=0)
|
| 178 |
+
|
| 179 |
+
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
|
| 180 |
+
msg = r"labels \['a'\] not found in level"
|
| 181 |
+
with pytest.raises(KeyError, match=msg):
|
| 182 |
+
mi.drop([np.nan, 1, "a"], level=0)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def test_droplevel_multiindex_one_level():
|
| 186 |
+
# GH#37208
|
| 187 |
+
index = MultiIndex.from_tuples([(2,)], names=("b",))
|
| 188 |
+
result = index.droplevel([])
|
| 189 |
+
expected = Index([2], name="b")
|
| 190 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.dtypes.common import is_any_real_numeric_dtype
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
Index,
|
| 9 |
+
MultiIndex,
|
| 10 |
+
Series,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_equals(idx):
|
| 16 |
+
assert idx.equals(idx)
|
| 17 |
+
assert idx.equals(idx.copy())
|
| 18 |
+
assert idx.equals(idx.astype(object))
|
| 19 |
+
assert idx.equals(idx.to_flat_index())
|
| 20 |
+
assert idx.equals(idx.to_flat_index().astype("category"))
|
| 21 |
+
|
| 22 |
+
assert not idx.equals(list(idx))
|
| 23 |
+
assert not idx.equals(np.array(idx))
|
| 24 |
+
|
| 25 |
+
same_values = Index(idx, dtype=object)
|
| 26 |
+
assert idx.equals(same_values)
|
| 27 |
+
assert same_values.equals(idx)
|
| 28 |
+
|
| 29 |
+
if idx.nlevels == 1:
|
| 30 |
+
# do not test MultiIndex
|
| 31 |
+
assert not idx.equals(Series(idx))
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_equals_op(idx):
|
| 35 |
+
# GH9947, GH10637
|
| 36 |
+
index_a = idx
|
| 37 |
+
|
| 38 |
+
n = len(index_a)
|
| 39 |
+
index_b = index_a[0:-1]
|
| 40 |
+
index_c = index_a[0:-1].append(index_a[-2:-1])
|
| 41 |
+
index_d = index_a[0:1]
|
| 42 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 43 |
+
index_a == index_b
|
| 44 |
+
expected1 = np.array([True] * n)
|
| 45 |
+
expected2 = np.array([True] * (n - 1) + [False])
|
| 46 |
+
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
| 47 |
+
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
| 48 |
+
|
| 49 |
+
# test comparisons with numpy arrays
|
| 50 |
+
array_a = np.array(index_a)
|
| 51 |
+
array_b = np.array(index_a[0:-1])
|
| 52 |
+
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
| 53 |
+
array_d = np.array(index_a[0:1])
|
| 54 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 55 |
+
index_a == array_b
|
| 56 |
+
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
| 57 |
+
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
| 58 |
+
|
| 59 |
+
# test comparisons with Series
|
| 60 |
+
series_a = Series(array_a)
|
| 61 |
+
series_b = Series(array_b)
|
| 62 |
+
series_c = Series(array_c)
|
| 63 |
+
series_d = Series(array_d)
|
| 64 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 65 |
+
index_a == series_b
|
| 66 |
+
|
| 67 |
+
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
| 68 |
+
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
| 69 |
+
|
| 70 |
+
# cases where length is 1 for one of them
|
| 71 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 72 |
+
index_a == index_d
|
| 73 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 74 |
+
index_a == series_d
|
| 75 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 76 |
+
index_a == array_d
|
| 77 |
+
msg = "Can only compare identically-labeled Series objects"
|
| 78 |
+
with pytest.raises(ValueError, match=msg):
|
| 79 |
+
series_a == series_d
|
| 80 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 81 |
+
series_a == array_d
|
| 82 |
+
|
| 83 |
+
# comparing with a scalar should broadcast; note that we are excluding
|
| 84 |
+
# MultiIndex because in this case each item in the index is a tuple of
|
| 85 |
+
# length 2, and therefore is considered an array of length 2 in the
|
| 86 |
+
# comparison instead of a scalar
|
| 87 |
+
if not isinstance(index_a, MultiIndex):
|
| 88 |
+
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
| 89 |
+
# assuming the 2nd to last item is unique in the data
|
| 90 |
+
item = index_a[-2]
|
| 91 |
+
tm.assert_numpy_array_equal(index_a == item, expected3)
|
| 92 |
+
tm.assert_series_equal(series_a == item, Series(expected3))
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def test_compare_tuple():
|
| 96 |
+
# GH#21517
|
| 97 |
+
mi = MultiIndex.from_product([[1, 2]] * 2)
|
| 98 |
+
|
| 99 |
+
all_false = np.array([False, False, False, False])
|
| 100 |
+
|
| 101 |
+
result = mi == mi[0]
|
| 102 |
+
expected = np.array([True, False, False, False])
|
| 103 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 104 |
+
|
| 105 |
+
result = mi != mi[0]
|
| 106 |
+
tm.assert_numpy_array_equal(result, ~expected)
|
| 107 |
+
|
| 108 |
+
result = mi < mi[0]
|
| 109 |
+
tm.assert_numpy_array_equal(result, all_false)
|
| 110 |
+
|
| 111 |
+
result = mi <= mi[0]
|
| 112 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 113 |
+
|
| 114 |
+
result = mi > mi[0]
|
| 115 |
+
tm.assert_numpy_array_equal(result, ~expected)
|
| 116 |
+
|
| 117 |
+
result = mi >= mi[0]
|
| 118 |
+
tm.assert_numpy_array_equal(result, ~all_false)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def test_compare_tuple_strs():
|
| 122 |
+
# GH#34180
|
| 123 |
+
|
| 124 |
+
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
|
| 125 |
+
|
| 126 |
+
result = mi == ("c", "a")
|
| 127 |
+
expected = np.array([False, False, True])
|
| 128 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 129 |
+
|
| 130 |
+
result = mi == ("c",)
|
| 131 |
+
expected = np.array([False, False, False])
|
| 132 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def test_equals_multi(idx):
|
| 136 |
+
assert idx.equals(idx)
|
| 137 |
+
assert not idx.equals(idx.values)
|
| 138 |
+
assert idx.equals(Index(idx.values))
|
| 139 |
+
|
| 140 |
+
assert idx.equal_levels(idx)
|
| 141 |
+
assert not idx.equals(idx[:-1])
|
| 142 |
+
assert not idx.equals(idx[-1])
|
| 143 |
+
|
| 144 |
+
# different number of levels
|
| 145 |
+
index = MultiIndex(
|
| 146 |
+
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
|
| 147 |
+
codes=[
|
| 148 |
+
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
| 149 |
+
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
| 150 |
+
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
| 151 |
+
],
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
| 155 |
+
assert not index.equals(index2)
|
| 156 |
+
assert not index.equal_levels(index2)
|
| 157 |
+
|
| 158 |
+
# levels are different
|
| 159 |
+
major_axis = Index(list(range(4)))
|
| 160 |
+
minor_axis = Index(list(range(2)))
|
| 161 |
+
|
| 162 |
+
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
| 163 |
+
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
| 164 |
+
|
| 165 |
+
index = MultiIndex(
|
| 166 |
+
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
| 167 |
+
)
|
| 168 |
+
assert not idx.equals(index)
|
| 169 |
+
assert not idx.equal_levels(index)
|
| 170 |
+
|
| 171 |
+
# some of the labels are different
|
| 172 |
+
major_axis = Index(["foo", "bar", "baz", "qux"])
|
| 173 |
+
minor_axis = Index(["one", "two"])
|
| 174 |
+
|
| 175 |
+
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
| 176 |
+
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
| 177 |
+
|
| 178 |
+
index = MultiIndex(
|
| 179 |
+
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
| 180 |
+
)
|
| 181 |
+
assert not idx.equals(index)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def test_identical(idx):
|
| 185 |
+
mi = idx.copy()
|
| 186 |
+
mi2 = idx.copy()
|
| 187 |
+
assert mi.identical(mi2)
|
| 188 |
+
|
| 189 |
+
mi = mi.set_names(["new1", "new2"])
|
| 190 |
+
assert mi.equals(mi2)
|
| 191 |
+
assert not mi.identical(mi2)
|
| 192 |
+
|
| 193 |
+
mi2 = mi2.set_names(["new1", "new2"])
|
| 194 |
+
assert mi.identical(mi2)
|
| 195 |
+
|
| 196 |
+
mi4 = Index(mi.tolist(), tupleize_cols=False)
|
| 197 |
+
assert not mi.identical(mi4)
|
| 198 |
+
assert mi.equals(mi4)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def test_equals_operator(idx):
|
| 202 |
+
# GH9785
|
| 203 |
+
assert (idx == idx).all()
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def test_equals_missing_values():
|
| 207 |
+
# make sure take is not using -1
|
| 208 |
+
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
|
| 209 |
+
result = i[0:1].equals(i[0])
|
| 210 |
+
assert not result
|
| 211 |
+
result = i[1:2].equals(i[1])
|
| 212 |
+
assert not result
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def test_equals_missing_values_differently_sorted():
|
| 216 |
+
# GH#38439
|
| 217 |
+
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
| 218 |
+
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
|
| 219 |
+
assert not mi1.equals(mi2)
|
| 220 |
+
|
| 221 |
+
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
| 222 |
+
assert mi1.equals(mi2)
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def test_is_():
|
| 226 |
+
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
|
| 227 |
+
assert mi.is_(mi)
|
| 228 |
+
assert mi.is_(mi.view())
|
| 229 |
+
assert mi.is_(mi.view().view().view().view())
|
| 230 |
+
mi2 = mi.view()
|
| 231 |
+
# names are metadata, they don't change id
|
| 232 |
+
mi2.names = ["A", "B"]
|
| 233 |
+
assert mi2.is_(mi)
|
| 234 |
+
assert mi.is_(mi2)
|
| 235 |
+
|
| 236 |
+
assert not mi.is_(mi.set_names(["C", "D"]))
|
| 237 |
+
# levels are inherent properties, they change identity
|
| 238 |
+
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
|
| 239 |
+
assert not mi3.is_(mi2)
|
| 240 |
+
# shouldn't change
|
| 241 |
+
assert mi2.is_(mi)
|
| 242 |
+
mi4 = mi3.view()
|
| 243 |
+
|
| 244 |
+
# GH 17464 - Remove duplicate MultiIndex levels
|
| 245 |
+
mi4 = mi4.set_levels([list(range(10)), list(range(10))])
|
| 246 |
+
assert not mi4.is_(mi3)
|
| 247 |
+
mi5 = mi.view()
|
| 248 |
+
mi5 = mi5.set_levels(mi5.levels)
|
| 249 |
+
assert not mi5.is_(mi)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def test_is_all_dates(idx):
|
| 253 |
+
assert not idx._is_all_dates
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def test_is_numeric(idx):
|
| 257 |
+
# MultiIndex is never numeric
|
| 258 |
+
assert not is_any_real_numeric_dtype(idx)
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def test_multiindex_compare():
|
| 262 |
+
# GH 21149
|
| 263 |
+
# Ensure comparison operations for MultiIndex with nlevels == 1
|
| 264 |
+
# behave consistently with those for MultiIndex with nlevels > 1
|
| 265 |
+
|
| 266 |
+
midx = MultiIndex.from_product([[0, 1]])
|
| 267 |
+
|
| 268 |
+
# Equality self-test: MultiIndex object vs self
|
| 269 |
+
expected = Series([True, True])
|
| 270 |
+
result = Series(midx == midx)
|
| 271 |
+
tm.assert_series_equal(result, expected)
|
| 272 |
+
|
| 273 |
+
# Greater than comparison: MultiIndex object vs self
|
| 274 |
+
expected = Series([False, False])
|
| 275 |
+
result = Series(midx > midx)
|
| 276 |
+
tm.assert_series_equal(result, expected)
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def test_equals_ea_int_regular_int():
|
| 280 |
+
# GH#46026
|
| 281 |
+
mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
|
| 282 |
+
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
| 283 |
+
assert not mi1.equals(mi2)
|
| 284 |
+
assert not mi2.equals(mi1)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
Index,
|
| 7 |
+
MultiIndex,
|
| 8 |
+
)
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_format(idx):
|
| 13 |
+
msg = "MultiIndex.format is deprecated"
|
| 14 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 15 |
+
idx.format()
|
| 16 |
+
idx[:0].format()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_format_integer_names():
|
| 20 |
+
index = MultiIndex(
|
| 21 |
+
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
|
| 22 |
+
)
|
| 23 |
+
msg = "MultiIndex.format is deprecated"
|
| 24 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 25 |
+
index.format(names=True)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_format_sparse_config(idx):
|
| 29 |
+
# GH1538
|
| 30 |
+
msg = "MultiIndex.format is deprecated"
|
| 31 |
+
with pd.option_context("display.multi_sparse", False):
|
| 32 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 33 |
+
result = idx.format()
|
| 34 |
+
assert result[1] == "foo two"
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def test_format_sparse_display():
|
| 38 |
+
index = MultiIndex(
|
| 39 |
+
levels=[[0, 1], [0, 1], [0, 1], [0]],
|
| 40 |
+
codes=[
|
| 41 |
+
[0, 0, 0, 1, 1, 1],
|
| 42 |
+
[0, 0, 1, 0, 0, 1],
|
| 43 |
+
[0, 1, 0, 0, 1, 0],
|
| 44 |
+
[0, 0, 0, 0, 0, 0],
|
| 45 |
+
],
|
| 46 |
+
)
|
| 47 |
+
msg = "MultiIndex.format is deprecated"
|
| 48 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 49 |
+
result = index.format()
|
| 50 |
+
assert result[3] == "1 0 0 0"
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_repr_with_unicode_data():
|
| 54 |
+
with pd.option_context("display.encoding", "UTF-8"):
|
| 55 |
+
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
| 56 |
+
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
| 57 |
+
assert "\\" not in repr(index) # we don't want unicode-escaped
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def test_repr_roundtrip_raises():
|
| 61 |
+
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
|
| 62 |
+
msg = "Must pass both levels and codes"
|
| 63 |
+
with pytest.raises(TypeError, match=msg):
|
| 64 |
+
eval(repr(mi))
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def test_unicode_string_with_unicode():
|
| 68 |
+
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
| 69 |
+
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
| 70 |
+
str(idx)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def test_repr_max_seq_item_setting(idx):
|
| 74 |
+
# GH10182
|
| 75 |
+
idx = idx.repeat(50)
|
| 76 |
+
with pd.option_context("display.max_seq_items", None):
|
| 77 |
+
repr(idx)
|
| 78 |
+
assert "..." not in str(idx)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class TestRepr:
|
| 82 |
+
def test_unicode_repr_issues(self):
|
| 83 |
+
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
|
| 84 |
+
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
|
| 85 |
+
index = MultiIndex(levels=levels, codes=codes)
|
| 86 |
+
|
| 87 |
+
repr(index.levels)
|
| 88 |
+
repr(index.get_level_values(1))
|
| 89 |
+
|
| 90 |
+
def test_repr_max_seq_items_equal_to_n(self, idx):
|
| 91 |
+
# display.max_seq_items == n
|
| 92 |
+
with pd.option_context("display.max_seq_items", 6):
|
| 93 |
+
result = idx.__repr__()
|
| 94 |
+
expected = """\
|
| 95 |
+
MultiIndex([('foo', 'one'),
|
| 96 |
+
('foo', 'two'),
|
| 97 |
+
('bar', 'one'),
|
| 98 |
+
('baz', 'two'),
|
| 99 |
+
('qux', 'one'),
|
| 100 |
+
('qux', 'two')],
|
| 101 |
+
names=['first', 'second'])"""
|
| 102 |
+
assert result == expected
|
| 103 |
+
|
| 104 |
+
def test_repr(self, idx):
|
| 105 |
+
result = idx[:1].__repr__()
|
| 106 |
+
expected = """\
|
| 107 |
+
MultiIndex([('foo', 'one')],
|
| 108 |
+
names=['first', 'second'])"""
|
| 109 |
+
assert result == expected
|
| 110 |
+
|
| 111 |
+
result = idx.__repr__()
|
| 112 |
+
expected = """\
|
| 113 |
+
MultiIndex([('foo', 'one'),
|
| 114 |
+
('foo', 'two'),
|
| 115 |
+
('bar', 'one'),
|
| 116 |
+
('baz', 'two'),
|
| 117 |
+
('qux', 'one'),
|
| 118 |
+
('qux', 'two')],
|
| 119 |
+
names=['first', 'second'])"""
|
| 120 |
+
assert result == expected
|
| 121 |
+
|
| 122 |
+
with pd.option_context("display.max_seq_items", 5):
|
| 123 |
+
result = idx.__repr__()
|
| 124 |
+
expected = """\
|
| 125 |
+
MultiIndex([('foo', 'one'),
|
| 126 |
+
('foo', 'two'),
|
| 127 |
+
...
|
| 128 |
+
('qux', 'one'),
|
| 129 |
+
('qux', 'two')],
|
| 130 |
+
names=['first', 'second'], length=6)"""
|
| 131 |
+
assert result == expected
|
| 132 |
+
|
| 133 |
+
# display.max_seq_items == 1
|
| 134 |
+
with pd.option_context("display.max_seq_items", 1):
|
| 135 |
+
result = idx.__repr__()
|
| 136 |
+
expected = """\
|
| 137 |
+
MultiIndex([...
|
| 138 |
+
('qux', 'two')],
|
| 139 |
+
names=['first', ...], length=6)"""
|
| 140 |
+
assert result == expected
|
| 141 |
+
|
| 142 |
+
def test_rjust(self):
|
| 143 |
+
n = 1000
|
| 144 |
+
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
| 145 |
+
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
| 146 |
+
mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
|
| 147 |
+
result = mi[:1].__repr__()
|
| 148 |
+
expected = """\
|
| 149 |
+
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
|
| 150 |
+
names=['a', 'b', 'dti'])"""
|
| 151 |
+
assert result == expected
|
| 152 |
+
|
| 153 |
+
result = mi[::500].__repr__()
|
| 154 |
+
expected = """\
|
| 155 |
+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
| 156 |
+
( 'a', 9, '2000-01-01 00:08:20'),
|
| 157 |
+
('abc', 10, '2000-01-01 00:16:40'),
|
| 158 |
+
('abc', 10, '2000-01-01 00:25:00')],
|
| 159 |
+
names=['a', 'b', 'dti'])"""
|
| 160 |
+
assert result == expected
|
| 161 |
+
|
| 162 |
+
result = mi.__repr__()
|
| 163 |
+
expected = """\
|
| 164 |
+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
| 165 |
+
( 'a', 9, '2000-01-01 00:00:01'),
|
| 166 |
+
( 'a', 9, '2000-01-01 00:00:02'),
|
| 167 |
+
( 'a', 9, '2000-01-01 00:00:03'),
|
| 168 |
+
( 'a', 9, '2000-01-01 00:00:04'),
|
| 169 |
+
( 'a', 9, '2000-01-01 00:00:05'),
|
| 170 |
+
( 'a', 9, '2000-01-01 00:00:06'),
|
| 171 |
+
( 'a', 9, '2000-01-01 00:00:07'),
|
| 172 |
+
( 'a', 9, '2000-01-01 00:00:08'),
|
| 173 |
+
( 'a', 9, '2000-01-01 00:00:09'),
|
| 174 |
+
...
|
| 175 |
+
('abc', 10, '2000-01-01 00:33:10'),
|
| 176 |
+
('abc', 10, '2000-01-01 00:33:11'),
|
| 177 |
+
('abc', 10, '2000-01-01 00:33:12'),
|
| 178 |
+
('abc', 10, '2000-01-01 00:33:13'),
|
| 179 |
+
('abc', 10, '2000-01-01 00:33:14'),
|
| 180 |
+
('abc', 10, '2000-01-01 00:33:15'),
|
| 181 |
+
('abc', 10, '2000-01-01 00:33:16'),
|
| 182 |
+
('abc', 10, '2000-01-01 00:33:17'),
|
| 183 |
+
('abc', 10, '2000-01-01 00:33:18'),
|
| 184 |
+
('abc', 10, '2000-01-01 00:33:19')],
|
| 185 |
+
names=['a', 'b', 'dti'], length=2000)"""
|
| 186 |
+
assert result == expected
|
| 187 |
+
|
| 188 |
+
def test_tuple_width(self):
|
| 189 |
+
n = 1000
|
| 190 |
+
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
| 191 |
+
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
| 192 |
+
levels = [ci, ci.codes + 9, dti, dti, dti]
|
| 193 |
+
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
|
| 194 |
+
mi = MultiIndex.from_arrays(levels, names=names)
|
| 195 |
+
result = mi[:1].__repr__()
|
| 196 |
+
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
|
| 197 |
+
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
|
| 198 |
+
assert result == expected
|
| 199 |
+
|
| 200 |
+
result = mi[:10].__repr__()
|
| 201 |
+
expected = """\
|
| 202 |
+
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
| 203 |
+
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
| 204 |
+
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
| 205 |
+
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
| 206 |
+
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
| 207 |
+
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
| 208 |
+
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
| 209 |
+
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
| 210 |
+
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
| 211 |
+
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
|
| 212 |
+
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
| 213 |
+
assert result == expected
|
| 214 |
+
|
| 215 |
+
result = mi.__repr__()
|
| 216 |
+
expected = """\
|
| 217 |
+
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
| 218 |
+
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
| 219 |
+
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
| 220 |
+
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
| 221 |
+
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
| 222 |
+
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
| 223 |
+
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
| 224 |
+
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
| 225 |
+
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
| 226 |
+
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
|
| 227 |
+
...
|
| 228 |
+
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
|
| 229 |
+
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
|
| 230 |
+
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
|
| 231 |
+
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
|
| 232 |
+
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
|
| 233 |
+
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
|
| 234 |
+
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
|
| 235 |
+
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
|
| 236 |
+
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
|
| 237 |
+
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
|
| 238 |
+
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
|
| 239 |
+
assert result == expected
|
| 240 |
+
|
| 241 |
+
def test_multiindex_long_element(self):
|
| 242 |
+
# Non-regression test towards GH#52960
|
| 243 |
+
data = MultiIndex.from_tuples([("c" * 62,)])
|
| 244 |
+
|
| 245 |
+
expected = (
|
| 246 |
+
"MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
|
| 247 |
+
"cccccccccccccccccccccc',)],\n )"
|
| 248 |
+
)
|
| 249 |
+
assert str(data) == expected
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas._libs import index as libindex
|
| 7 |
+
|
| 8 |
+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from pandas import (
|
| 12 |
+
Index,
|
| 13 |
+
IntervalIndex,
|
| 14 |
+
MultiIndex,
|
| 15 |
+
RangeIndex,
|
| 16 |
+
)
|
| 17 |
+
import pandas._testing as tm
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_labels_dtypes():
|
| 21 |
+
# GH 8456
|
| 22 |
+
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
| 23 |
+
assert i.codes[0].dtype == "int8"
|
| 24 |
+
assert i.codes[1].dtype == "int8"
|
| 25 |
+
|
| 26 |
+
i = MultiIndex.from_product([["a"], range(40)])
|
| 27 |
+
assert i.codes[1].dtype == "int8"
|
| 28 |
+
i = MultiIndex.from_product([["a"], range(400)])
|
| 29 |
+
assert i.codes[1].dtype == "int16"
|
| 30 |
+
i = MultiIndex.from_product([["a"], range(40000)])
|
| 31 |
+
assert i.codes[1].dtype == "int32"
|
| 32 |
+
|
| 33 |
+
i = MultiIndex.from_product([["a"], range(1000)])
|
| 34 |
+
assert (i.codes[0] >= 0).all()
|
| 35 |
+
assert (i.codes[1] >= 0).all()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_values_boxed():
|
| 39 |
+
tuples = [
|
| 40 |
+
(1, pd.Timestamp("2000-01-01")),
|
| 41 |
+
(2, pd.NaT),
|
| 42 |
+
(3, pd.Timestamp("2000-01-03")),
|
| 43 |
+
(1, pd.Timestamp("2000-01-04")),
|
| 44 |
+
(2, pd.Timestamp("2000-01-02")),
|
| 45 |
+
(3, pd.Timestamp("2000-01-03")),
|
| 46 |
+
]
|
| 47 |
+
result = MultiIndex.from_tuples(tuples)
|
| 48 |
+
expected = construct_1d_object_array_from_listlike(tuples)
|
| 49 |
+
tm.assert_numpy_array_equal(result.values, expected)
|
| 50 |
+
# Check that code branches for boxed values produce identical results
|
| 51 |
+
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def test_values_multiindex_datetimeindex():
|
| 55 |
+
# Test to ensure we hit the boxing / nobox part of MI.values
|
| 56 |
+
ints = np.arange(10**18, 10**18 + 5)
|
| 57 |
+
naive = pd.DatetimeIndex(ints)
|
| 58 |
+
|
| 59 |
+
aware = pd.DatetimeIndex(ints, tz="US/Central")
|
| 60 |
+
|
| 61 |
+
idx = MultiIndex.from_arrays([naive, aware])
|
| 62 |
+
result = idx.values
|
| 63 |
+
|
| 64 |
+
outer = pd.DatetimeIndex([x[0] for x in result])
|
| 65 |
+
tm.assert_index_equal(outer, naive)
|
| 66 |
+
|
| 67 |
+
inner = pd.DatetimeIndex([x[1] for x in result])
|
| 68 |
+
tm.assert_index_equal(inner, aware)
|
| 69 |
+
|
| 70 |
+
# n_lev > n_lab
|
| 71 |
+
result = idx[:2].values
|
| 72 |
+
|
| 73 |
+
outer = pd.DatetimeIndex([x[0] for x in result])
|
| 74 |
+
tm.assert_index_equal(outer, naive[:2])
|
| 75 |
+
|
| 76 |
+
inner = pd.DatetimeIndex([x[1] for x in result])
|
| 77 |
+
tm.assert_index_equal(inner, aware[:2])
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def test_values_multiindex_periodindex():
|
| 81 |
+
# Test to ensure we hit the boxing / nobox part of MI.values
|
| 82 |
+
ints = np.arange(2007, 2012)
|
| 83 |
+
pidx = pd.PeriodIndex(ints, freq="D")
|
| 84 |
+
|
| 85 |
+
idx = MultiIndex.from_arrays([ints, pidx])
|
| 86 |
+
result = idx.values
|
| 87 |
+
|
| 88 |
+
outer = Index([x[0] for x in result])
|
| 89 |
+
tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
|
| 90 |
+
|
| 91 |
+
inner = pd.PeriodIndex([x[1] for x in result])
|
| 92 |
+
tm.assert_index_equal(inner, pidx)
|
| 93 |
+
|
| 94 |
+
# n_lev > n_lab
|
| 95 |
+
result = idx[:2].values
|
| 96 |
+
|
| 97 |
+
outer = Index([x[0] for x in result])
|
| 98 |
+
tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
|
| 99 |
+
|
| 100 |
+
inner = pd.PeriodIndex([x[1] for x in result])
|
| 101 |
+
tm.assert_index_equal(inner, pidx[:2])
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def test_consistency():
|
| 105 |
+
# need to construct an overflow
|
| 106 |
+
major_axis = list(range(70000))
|
| 107 |
+
minor_axis = list(range(10))
|
| 108 |
+
|
| 109 |
+
major_codes = np.arange(70000)
|
| 110 |
+
minor_codes = np.repeat(range(10), 7000)
|
| 111 |
+
|
| 112 |
+
# the fact that is works means it's consistent
|
| 113 |
+
index = MultiIndex(
|
| 114 |
+
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# inconsistent
|
| 118 |
+
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
| 119 |
+
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
| 120 |
+
index = MultiIndex(
|
| 121 |
+
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
assert index.is_unique is False
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
@pytest.mark.slow
|
| 128 |
+
def test_hash_collisions(monkeypatch):
|
| 129 |
+
# non-smoke test that we don't get hash collisions
|
| 130 |
+
size_cutoff = 50
|
| 131 |
+
with monkeypatch.context() as m:
|
| 132 |
+
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
| 133 |
+
index = MultiIndex.from_product(
|
| 134 |
+
[np.arange(8), np.arange(8)], names=["one", "two"]
|
| 135 |
+
)
|
| 136 |
+
result = index.get_indexer(index.values)
|
| 137 |
+
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
|
| 138 |
+
|
| 139 |
+
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
| 140 |
+
result = index.get_loc(index[i])
|
| 141 |
+
assert result == i
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def test_dims():
|
| 145 |
+
pass
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def test_take_invalid_kwargs():
|
| 149 |
+
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
| 150 |
+
idx = MultiIndex.from_product(vals, names=["str", "dt"])
|
| 151 |
+
indices = [1, 2]
|
| 152 |
+
|
| 153 |
+
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
| 154 |
+
with pytest.raises(TypeError, match=msg):
|
| 155 |
+
idx.take(indices, foo=2)
|
| 156 |
+
|
| 157 |
+
msg = "the 'out' parameter is not supported"
|
| 158 |
+
with pytest.raises(ValueError, match=msg):
|
| 159 |
+
idx.take(indices, out=indices)
|
| 160 |
+
|
| 161 |
+
msg = "the 'mode' parameter is not supported"
|
| 162 |
+
with pytest.raises(ValueError, match=msg):
|
| 163 |
+
idx.take(indices, mode="clip")
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def test_isna_behavior(idx):
|
| 167 |
+
# should not segfault GH5123
|
| 168 |
+
# NOTE: if MI representation changes, may make sense to allow
|
| 169 |
+
# isna(MI)
|
| 170 |
+
msg = "isna is not defined for MultiIndex"
|
| 171 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 172 |
+
pd.isna(idx)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def test_large_multiindex_error(monkeypatch):
|
| 176 |
+
# GH12527
|
| 177 |
+
size_cutoff = 50
|
| 178 |
+
with monkeypatch.context() as m:
|
| 179 |
+
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
| 180 |
+
df_below_cutoff = pd.DataFrame(
|
| 181 |
+
1,
|
| 182 |
+
index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
|
| 183 |
+
columns=["dest"],
|
| 184 |
+
)
|
| 185 |
+
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
| 186 |
+
df_below_cutoff.loc[(-1, 0), "dest"]
|
| 187 |
+
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
| 188 |
+
df_below_cutoff.loc[(3, 0), "dest"]
|
| 189 |
+
df_above_cutoff = pd.DataFrame(
|
| 190 |
+
1,
|
| 191 |
+
index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
|
| 192 |
+
columns=["dest"],
|
| 193 |
+
)
|
| 194 |
+
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
| 195 |
+
df_above_cutoff.loc[(-1, 0), "dest"]
|
| 196 |
+
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
| 197 |
+
df_above_cutoff.loc[(3, 0), "dest"]
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def test_mi_hashtable_populated_attribute_error(monkeypatch):
|
| 201 |
+
# GH 18165
|
| 202 |
+
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
|
| 203 |
+
r = range(50)
|
| 204 |
+
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
|
| 205 |
+
|
| 206 |
+
msg = "'Series' object has no attribute 'foo'"
|
| 207 |
+
with pytest.raises(AttributeError, match=msg):
|
| 208 |
+
df["a"].foo()
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def test_can_hold_identifiers(idx):
|
| 212 |
+
key = idx[0]
|
| 213 |
+
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def test_metadata_immutable(idx):
|
| 217 |
+
levels, codes = idx.levels, idx.codes
|
| 218 |
+
# shouldn't be able to set at either the top level or base level
|
| 219 |
+
mutable_regex = re.compile("does not support mutable operations")
|
| 220 |
+
with pytest.raises(TypeError, match=mutable_regex):
|
| 221 |
+
levels[0] = levels[0]
|
| 222 |
+
with pytest.raises(TypeError, match=mutable_regex):
|
| 223 |
+
levels[0][0] = levels[0][0]
|
| 224 |
+
# ditto for labels
|
| 225 |
+
with pytest.raises(TypeError, match=mutable_regex):
|
| 226 |
+
codes[0] = codes[0]
|
| 227 |
+
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
| 228 |
+
codes[0][0] = codes[0][0]
|
| 229 |
+
# and for names
|
| 230 |
+
names = idx.names
|
| 231 |
+
with pytest.raises(TypeError, match=mutable_regex):
|
| 232 |
+
names[0] = names[0]
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def test_level_setting_resets_attributes():
|
| 236 |
+
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
| 237 |
+
assert ind.is_monotonic_increasing
|
| 238 |
+
ind = ind.set_levels([["A", "B"], [1, 3, 2]])
|
| 239 |
+
# if this fails, probably didn't reset the cache correctly.
|
| 240 |
+
assert not ind.is_monotonic_increasing
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def test_rangeindex_fallback_coercion_bug():
|
| 244 |
+
# GH 12893
|
| 245 |
+
df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
| 246 |
+
df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
| 247 |
+
df = pd.concat(
|
| 248 |
+
{"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
|
| 249 |
+
axis=1,
|
| 250 |
+
)
|
| 251 |
+
df.index.names = ["fizz", "buzz"]
|
| 252 |
+
|
| 253 |
+
expected = pd.DataFrame(
|
| 254 |
+
{"df2": np.arange(100), "df1": np.arange(100)},
|
| 255 |
+
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
|
| 256 |
+
)
|
| 257 |
+
tm.assert_frame_equal(df, expected, check_like=True)
|
| 258 |
+
|
| 259 |
+
result = df.index.get_level_values("fizz")
|
| 260 |
+
expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
|
| 261 |
+
tm.assert_index_equal(result, expected)
|
| 262 |
+
|
| 263 |
+
result = df.index.get_level_values("buzz")
|
| 264 |
+
expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
|
| 265 |
+
tm.assert_index_equal(result, expected)
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def test_memory_usage(idx):
|
| 269 |
+
result = idx.memory_usage()
|
| 270 |
+
if len(idx):
|
| 271 |
+
idx.get_loc(idx[0])
|
| 272 |
+
result2 = idx.memory_usage()
|
| 273 |
+
result3 = idx.memory_usage(deep=True)
|
| 274 |
+
|
| 275 |
+
# RangeIndex, IntervalIndex
|
| 276 |
+
# don't have engines
|
| 277 |
+
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
| 278 |
+
assert result2 > result
|
| 279 |
+
|
| 280 |
+
if idx.inferred_type == "object":
|
| 281 |
+
assert result3 > result2
|
| 282 |
+
|
| 283 |
+
else:
|
| 284 |
+
# we report 0 for no-length
|
| 285 |
+
assert result == 0
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def test_nlevels(idx):
|
| 289 |
+
assert idx.nlevels == 2
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
DataFrame,
|
| 6 |
+
Index,
|
| 7 |
+
Interval,
|
| 8 |
+
MultiIndex,
|
| 9 |
+
Series,
|
| 10 |
+
StringDtype,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@pytest.mark.parametrize(
|
| 16 |
+
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
|
| 17 |
+
)
|
| 18 |
+
def test_join_level(idx, other, join_type):
|
| 19 |
+
join_index, lidx, ridx = other.join(
|
| 20 |
+
idx, how=join_type, level="second", return_indexers=True
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
exp_level = other.join(idx.levels[1], how=join_type)
|
| 24 |
+
assert join_index.levels[0].equals(idx.levels[0])
|
| 25 |
+
assert join_index.levels[1].equals(exp_level)
|
| 26 |
+
|
| 27 |
+
# pare down levels
|
| 28 |
+
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
|
| 29 |
+
exp_values = idx.values[mask]
|
| 30 |
+
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
| 31 |
+
|
| 32 |
+
if join_type in ("outer", "inner"):
|
| 33 |
+
join_index2, ridx2, lidx2 = idx.join(
|
| 34 |
+
other, how=join_type, level="second", return_indexers=True
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
assert join_index.equals(join_index2)
|
| 38 |
+
tm.assert_numpy_array_equal(lidx, lidx2)
|
| 39 |
+
tm.assert_numpy_array_equal(ridx, ridx2)
|
| 40 |
+
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def test_join_level_corner_case(idx):
|
| 44 |
+
# some corner cases
|
| 45 |
+
index = Index(["three", "one", "two"])
|
| 46 |
+
result = index.join(idx, level="second")
|
| 47 |
+
assert isinstance(result, MultiIndex)
|
| 48 |
+
|
| 49 |
+
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
|
| 50 |
+
idx.join(idx, level=1)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_join_self(idx, join_type):
|
| 54 |
+
result = idx.join(idx, how=join_type)
|
| 55 |
+
expected = idx
|
| 56 |
+
if join_type == "outer":
|
| 57 |
+
expected = expected.sort_values()
|
| 58 |
+
tm.assert_index_equal(result, expected)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def test_join_multi():
|
| 62 |
+
# GH 10665
|
| 63 |
+
midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
|
| 64 |
+
idx = Index([1, 2, 5], name="b")
|
| 65 |
+
|
| 66 |
+
# inner
|
| 67 |
+
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
|
| 68 |
+
exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
|
| 69 |
+
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
| 70 |
+
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
| 71 |
+
tm.assert_index_equal(jidx, exp_idx)
|
| 72 |
+
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
| 73 |
+
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
| 74 |
+
# flip
|
| 75 |
+
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
|
| 76 |
+
tm.assert_index_equal(jidx, exp_idx)
|
| 77 |
+
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
| 78 |
+
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
| 79 |
+
|
| 80 |
+
# keep MultiIndex
|
| 81 |
+
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
|
| 82 |
+
exp_ridx = np.array(
|
| 83 |
+
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
|
| 84 |
+
)
|
| 85 |
+
tm.assert_index_equal(jidx, midx)
|
| 86 |
+
assert lidx is None
|
| 87 |
+
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
| 88 |
+
# flip
|
| 89 |
+
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
|
| 90 |
+
tm.assert_index_equal(jidx, midx)
|
| 91 |
+
assert lidx is None
|
| 92 |
+
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def test_join_multi_wrong_order():
|
| 96 |
+
# GH 25760
|
| 97 |
+
# GH 28956
|
| 98 |
+
|
| 99 |
+
midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
| 100 |
+
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
|
| 101 |
+
|
| 102 |
+
join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
|
| 103 |
+
|
| 104 |
+
exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
|
| 105 |
+
|
| 106 |
+
tm.assert_index_equal(midx1, join_idx)
|
| 107 |
+
assert lidx is None
|
| 108 |
+
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def test_join_multi_return_indexers():
|
| 112 |
+
# GH 34074
|
| 113 |
+
|
| 114 |
+
midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
|
| 115 |
+
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
| 116 |
+
|
| 117 |
+
result = midx1.join(midx2, return_indexers=False)
|
| 118 |
+
tm.assert_index_equal(result, midx1)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def test_join_overlapping_interval_level():
|
| 122 |
+
# GH 44096
|
| 123 |
+
idx_1 = MultiIndex.from_tuples(
|
| 124 |
+
[
|
| 125 |
+
(1, Interval(0.0, 1.0)),
|
| 126 |
+
(1, Interval(1.0, 2.0)),
|
| 127 |
+
(1, Interval(2.0, 5.0)),
|
| 128 |
+
(2, Interval(0.0, 1.0)),
|
| 129 |
+
(2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
|
| 130 |
+
(2, Interval(3.0, 5.0)),
|
| 131 |
+
],
|
| 132 |
+
names=["num", "interval"],
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
idx_2 = MultiIndex.from_tuples(
|
| 136 |
+
[
|
| 137 |
+
(1, Interval(2.0, 5.0)),
|
| 138 |
+
(1, Interval(0.0, 1.0)),
|
| 139 |
+
(1, Interval(1.0, 2.0)),
|
| 140 |
+
(2, Interval(3.0, 5.0)),
|
| 141 |
+
(2, Interval(0.0, 1.0)),
|
| 142 |
+
(2, Interval(1.0, 3.0)),
|
| 143 |
+
],
|
| 144 |
+
names=["num", "interval"],
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
expected = MultiIndex.from_tuples(
|
| 148 |
+
[
|
| 149 |
+
(1, Interval(0.0, 1.0)),
|
| 150 |
+
(1, Interval(1.0, 2.0)),
|
| 151 |
+
(1, Interval(2.0, 5.0)),
|
| 152 |
+
(2, Interval(0.0, 1.0)),
|
| 153 |
+
(2, Interval(1.0, 3.0)),
|
| 154 |
+
(2, Interval(3.0, 5.0)),
|
| 155 |
+
],
|
| 156 |
+
names=["num", "interval"],
|
| 157 |
+
)
|
| 158 |
+
result = idx_1.join(idx_2, how="outer")
|
| 159 |
+
|
| 160 |
+
tm.assert_index_equal(result, expected)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def test_join_midx_ea():
|
| 164 |
+
# GH#49277
|
| 165 |
+
midx = MultiIndex.from_arrays(
|
| 166 |
+
[Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")],
|
| 167 |
+
names=["a", "b"],
|
| 168 |
+
)
|
| 169 |
+
midx2 = MultiIndex.from_arrays(
|
| 170 |
+
[Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"]
|
| 171 |
+
)
|
| 172 |
+
result = midx.join(midx2, how="inner")
|
| 173 |
+
expected = MultiIndex.from_arrays(
|
| 174 |
+
[
|
| 175 |
+
Series([1, 1], dtype="Int64"),
|
| 176 |
+
Series([1, 2], dtype="Int64"),
|
| 177 |
+
Series([3, 3], dtype="Int64"),
|
| 178 |
+
],
|
| 179 |
+
names=["a", "b", "c"],
|
| 180 |
+
)
|
| 181 |
+
tm.assert_index_equal(result, expected)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def test_join_midx_string():
|
| 185 |
+
# GH#49277
|
| 186 |
+
midx = MultiIndex.from_arrays(
|
| 187 |
+
[
|
| 188 |
+
Series(["a", "a", "c"], dtype=StringDtype()),
|
| 189 |
+
Series(["a", "b", "c"], dtype=StringDtype()),
|
| 190 |
+
],
|
| 191 |
+
names=["a", "b"],
|
| 192 |
+
)
|
| 193 |
+
midx2 = MultiIndex.from_arrays(
|
| 194 |
+
[Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())],
|
| 195 |
+
names=["a", "c"],
|
| 196 |
+
)
|
| 197 |
+
result = midx.join(midx2, how="inner")
|
| 198 |
+
expected = MultiIndex.from_arrays(
|
| 199 |
+
[
|
| 200 |
+
Series(["a", "a"], dtype=StringDtype()),
|
| 201 |
+
Series(["a", "b"], dtype=StringDtype()),
|
| 202 |
+
Series(["c", "c"], dtype=StringDtype()),
|
| 203 |
+
],
|
| 204 |
+
names=["a", "b", "c"],
|
| 205 |
+
)
|
| 206 |
+
tm.assert_index_equal(result, expected)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def test_join_multi_with_nan():
|
| 210 |
+
# GH29252
|
| 211 |
+
df1 = DataFrame(
|
| 212 |
+
data={"col1": [1.1, 1.2]},
|
| 213 |
+
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
|
| 214 |
+
)
|
| 215 |
+
df2 = DataFrame(
|
| 216 |
+
data={"col2": [2.1, 2.2]},
|
| 217 |
+
index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]),
|
| 218 |
+
)
|
| 219 |
+
result = df1.join(df2)
|
| 220 |
+
expected = DataFrame(
|
| 221 |
+
data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]},
|
| 222 |
+
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
|
| 223 |
+
)
|
| 224 |
+
tm.assert_frame_equal(result, expected)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
@pytest.mark.parametrize("val", [0, 5])
|
| 228 |
+
def test_join_dtypes(any_numeric_ea_dtype, val):
|
| 229 |
+
# GH#49830
|
| 230 |
+
midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]])
|
| 231 |
+
midx2 = MultiIndex.from_arrays(
|
| 232 |
+
[Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]]
|
| 233 |
+
)
|
| 234 |
+
result = midx.join(midx2, how="outer")
|
| 235 |
+
expected = MultiIndex.from_arrays(
|
| 236 |
+
[Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]]
|
| 237 |
+
).sort_values()
|
| 238 |
+
tm.assert_index_equal(result, expected)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def test_join_dtypes_all_nan(any_numeric_ea_dtype):
|
| 242 |
+
# GH#49830
|
| 243 |
+
midx = MultiIndex.from_arrays(
|
| 244 |
+
[Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]]
|
| 245 |
+
)
|
| 246 |
+
midx2 = MultiIndex.from_arrays(
|
| 247 |
+
[Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]]
|
| 248 |
+
)
|
| 249 |
+
result = midx.join(midx2, how="outer")
|
| 250 |
+
expected = MultiIndex.from_arrays(
|
| 251 |
+
[
|
| 252 |
+
Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype),
|
| 253 |
+
[np.nan, np.nan, np.nan, np.nan],
|
| 254 |
+
]
|
| 255 |
+
)
|
| 256 |
+
tm.assert_index_equal(result, expected)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def test_join_index_levels():
|
| 260 |
+
# GH#53093
|
| 261 |
+
midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")])
|
| 262 |
+
midx2 = MultiIndex.from_tuples([("a", "2019-01-31")])
|
| 263 |
+
result = midx.join(midx2, how="outer")
|
| 264 |
+
expected = MultiIndex.from_tuples(
|
| 265 |
+
[("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")]
|
| 266 |
+
)
|
| 267 |
+
tm.assert_index_equal(result.levels[1], expected.levels[1])
|
| 268 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas import MultiIndex
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class TestIsLexsorted:
|
| 5 |
+
def test_is_lexsorted(self):
|
| 6 |
+
levels = [[0, 1], [0, 1, 2]]
|
| 7 |
+
|
| 8 |
+
index = MultiIndex(
|
| 9 |
+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
| 10 |
+
)
|
| 11 |
+
assert index._is_lexsorted()
|
| 12 |
+
|
| 13 |
+
index = MultiIndex(
|
| 14 |
+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
|
| 15 |
+
)
|
| 16 |
+
assert not index._is_lexsorted()
|
| 17 |
+
|
| 18 |
+
index = MultiIndex(
|
| 19 |
+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
|
| 20 |
+
)
|
| 21 |
+
assert not index._is_lexsorted()
|
| 22 |
+
assert index._lexsort_depth == 0
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class TestLexsortDepth:
|
| 26 |
+
def test_lexsort_depth(self):
|
| 27 |
+
# Test that lexsort_depth return the correct sortorder
|
| 28 |
+
# when it was given to the MultiIndex const.
|
| 29 |
+
# GH#28518
|
| 30 |
+
|
| 31 |
+
levels = [[0, 1], [0, 1, 2]]
|
| 32 |
+
|
| 33 |
+
index = MultiIndex(
|
| 34 |
+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
| 35 |
+
)
|
| 36 |
+
assert index._lexsort_depth == 2
|
| 37 |
+
|
| 38 |
+
index = MultiIndex(
|
| 39 |
+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
|
| 40 |
+
)
|
| 41 |
+
assert index._lexsort_depth == 1
|
| 42 |
+
|
| 43 |
+
index = MultiIndex(
|
| 44 |
+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
|
| 45 |
+
)
|
| 46 |
+
assert index._lexsort_depth == 0
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Index,
|
| 6 |
+
MultiIndex,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
|
| 11 |
+
# string ordering
|
| 12 |
+
mi = lexsorted_two_level_string_multiindex
|
| 13 |
+
assert mi.is_monotonic_increasing is False
|
| 14 |
+
assert Index(mi.values).is_monotonic_increasing is False
|
| 15 |
+
assert mi._is_strictly_monotonic_increasing is False
|
| 16 |
+
assert Index(mi.values)._is_strictly_monotonic_increasing is False
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_is_monotonic_increasing():
|
| 20 |
+
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
|
| 21 |
+
assert i.is_monotonic_increasing is True
|
| 22 |
+
assert i._is_strictly_monotonic_increasing is True
|
| 23 |
+
assert Index(i.values).is_monotonic_increasing is True
|
| 24 |
+
assert i._is_strictly_monotonic_increasing is True
|
| 25 |
+
|
| 26 |
+
i = MultiIndex.from_product(
|
| 27 |
+
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
| 28 |
+
)
|
| 29 |
+
assert i.is_monotonic_increasing is False
|
| 30 |
+
assert i._is_strictly_monotonic_increasing is False
|
| 31 |
+
assert Index(i.values).is_monotonic_increasing is False
|
| 32 |
+
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
| 33 |
+
|
| 34 |
+
i = MultiIndex.from_product(
|
| 35 |
+
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
| 36 |
+
)
|
| 37 |
+
assert i.is_monotonic_increasing is False
|
| 38 |
+
assert i._is_strictly_monotonic_increasing is False
|
| 39 |
+
assert Index(i.values).is_monotonic_increasing is False
|
| 40 |
+
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
| 41 |
+
|
| 42 |
+
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
|
| 43 |
+
assert i.is_monotonic_increasing is False
|
| 44 |
+
assert i._is_strictly_monotonic_increasing is False
|
| 45 |
+
assert Index(i.values).is_monotonic_increasing is False
|
| 46 |
+
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
| 47 |
+
|
| 48 |
+
i = MultiIndex(
|
| 49 |
+
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
|
| 50 |
+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
| 51 |
+
names=["first", "second"],
|
| 52 |
+
)
|
| 53 |
+
assert i.is_monotonic_increasing is True
|
| 54 |
+
assert Index(i.values).is_monotonic_increasing is True
|
| 55 |
+
assert i._is_strictly_monotonic_increasing is True
|
| 56 |
+
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
| 57 |
+
|
| 58 |
+
# mixed levels, hits the TypeError
|
| 59 |
+
i = MultiIndex(
|
| 60 |
+
levels=[
|
| 61 |
+
[1, 2, 3, 4],
|
| 62 |
+
[
|
| 63 |
+
"gb00b03mlx29",
|
| 64 |
+
"lu0197800237",
|
| 65 |
+
"nl0000289783",
|
| 66 |
+
"nl0000289965",
|
| 67 |
+
"nl0000301109",
|
| 68 |
+
],
|
| 69 |
+
],
|
| 70 |
+
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
| 71 |
+
names=["household_id", "asset_id"],
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
assert i.is_monotonic_increasing is False
|
| 75 |
+
assert i._is_strictly_monotonic_increasing is False
|
| 76 |
+
|
| 77 |
+
# empty
|
| 78 |
+
i = MultiIndex.from_arrays([[], []])
|
| 79 |
+
assert i.is_monotonic_increasing is True
|
| 80 |
+
assert Index(i.values).is_monotonic_increasing is True
|
| 81 |
+
assert i._is_strictly_monotonic_increasing is True
|
| 82 |
+
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def test_is_monotonic_decreasing():
|
| 86 |
+
i = MultiIndex.from_product(
|
| 87 |
+
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
|
| 88 |
+
)
|
| 89 |
+
assert i.is_monotonic_decreasing is True
|
| 90 |
+
assert i._is_strictly_monotonic_decreasing is True
|
| 91 |
+
assert Index(i.values).is_monotonic_decreasing is True
|
| 92 |
+
assert i._is_strictly_monotonic_decreasing is True
|
| 93 |
+
|
| 94 |
+
i = MultiIndex.from_product(
|
| 95 |
+
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
| 96 |
+
)
|
| 97 |
+
assert i.is_monotonic_decreasing is False
|
| 98 |
+
assert i._is_strictly_monotonic_decreasing is False
|
| 99 |
+
assert Index(i.values).is_monotonic_decreasing is False
|
| 100 |
+
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
| 101 |
+
|
| 102 |
+
i = MultiIndex.from_product(
|
| 103 |
+
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
| 104 |
+
)
|
| 105 |
+
assert i.is_monotonic_decreasing is False
|
| 106 |
+
assert i._is_strictly_monotonic_decreasing is False
|
| 107 |
+
assert Index(i.values).is_monotonic_decreasing is False
|
| 108 |
+
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
| 109 |
+
|
| 110 |
+
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
|
| 111 |
+
assert i.is_monotonic_decreasing is False
|
| 112 |
+
assert i._is_strictly_monotonic_decreasing is False
|
| 113 |
+
assert Index(i.values).is_monotonic_decreasing is False
|
| 114 |
+
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
| 115 |
+
|
| 116 |
+
# string ordering
|
| 117 |
+
i = MultiIndex(
|
| 118 |
+
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
|
| 119 |
+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
| 120 |
+
names=["first", "second"],
|
| 121 |
+
)
|
| 122 |
+
assert i.is_monotonic_decreasing is False
|
| 123 |
+
assert Index(i.values).is_monotonic_decreasing is False
|
| 124 |
+
assert i._is_strictly_monotonic_decreasing is False
|
| 125 |
+
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
| 126 |
+
|
| 127 |
+
i = MultiIndex(
|
| 128 |
+
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
|
| 129 |
+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
| 130 |
+
names=["first", "second"],
|
| 131 |
+
)
|
| 132 |
+
assert i.is_monotonic_decreasing is True
|
| 133 |
+
assert Index(i.values).is_monotonic_decreasing is True
|
| 134 |
+
assert i._is_strictly_monotonic_decreasing is True
|
| 135 |
+
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
| 136 |
+
|
| 137 |
+
# mixed levels, hits the TypeError
|
| 138 |
+
i = MultiIndex(
|
| 139 |
+
levels=[
|
| 140 |
+
[4, 3, 2, 1],
|
| 141 |
+
[
|
| 142 |
+
"nl0000301109",
|
| 143 |
+
"nl0000289965",
|
| 144 |
+
"nl0000289783",
|
| 145 |
+
"lu0197800237",
|
| 146 |
+
"gb00b03mlx29",
|
| 147 |
+
],
|
| 148 |
+
],
|
| 149 |
+
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
| 150 |
+
names=["household_id", "asset_id"],
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
assert i.is_monotonic_decreasing is False
|
| 154 |
+
assert i._is_strictly_monotonic_decreasing is False
|
| 155 |
+
|
| 156 |
+
# empty
|
| 157 |
+
i = MultiIndex.from_arrays([[], []])
|
| 158 |
+
assert i.is_monotonic_decreasing is True
|
| 159 |
+
assert Index(i.values).is_monotonic_decreasing is True
|
| 160 |
+
assert i._is_strictly_monotonic_decreasing is True
|
| 161 |
+
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def test_is_strictly_monotonic_increasing():
|
| 165 |
+
idx = MultiIndex(
|
| 166 |
+
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
| 167 |
+
)
|
| 168 |
+
assert idx.is_monotonic_increasing is True
|
| 169 |
+
assert idx._is_strictly_monotonic_increasing is False
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def test_is_strictly_monotonic_decreasing():
|
| 173 |
+
idx = MultiIndex(
|
| 174 |
+
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
| 175 |
+
)
|
| 176 |
+
assert idx.is_monotonic_decreasing is True
|
| 177 |
+
assert idx._is_strictly_monotonic_decreasing is False
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
|
| 181 |
+
@pytest.mark.parametrize(
|
| 182 |
+
"values",
|
| 183 |
+
[[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
|
| 184 |
+
)
|
| 185 |
+
def test_is_monotonic_with_nans(values, attr):
|
| 186 |
+
# GH: 37220
|
| 187 |
+
idx = MultiIndex.from_tuples(values, names=["test"])
|
| 188 |
+
assert getattr(idx, attr) is False
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
Index,
|
| 7 |
+
MultiIndex,
|
| 8 |
+
)
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_reindex(idx):
|
| 13 |
+
result, indexer = idx.reindex(list(idx[:4]))
|
| 14 |
+
assert isinstance(result, MultiIndex)
|
| 15 |
+
assert result.names == ["first", "second"]
|
| 16 |
+
assert [level.name for level in result.levels] == ["first", "second"]
|
| 17 |
+
|
| 18 |
+
result, indexer = idx.reindex(list(idx))
|
| 19 |
+
assert isinstance(result, MultiIndex)
|
| 20 |
+
assert indexer is None
|
| 21 |
+
assert result.names == ["first", "second"]
|
| 22 |
+
assert [level.name for level in result.levels] == ["first", "second"]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_reindex_level(idx):
|
| 26 |
+
index = Index(["one"])
|
| 27 |
+
|
| 28 |
+
target, indexer = idx.reindex(index, level="second")
|
| 29 |
+
target2, indexer2 = index.reindex(idx, level="second")
|
| 30 |
+
|
| 31 |
+
exp_index = idx.join(index, level="second", how="right")
|
| 32 |
+
exp_index2 = idx.join(index, level="second", how="left")
|
| 33 |
+
|
| 34 |
+
assert target.equals(exp_index)
|
| 35 |
+
exp_indexer = np.array([0, 2, 4])
|
| 36 |
+
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
| 37 |
+
|
| 38 |
+
assert target2.equals(exp_index2)
|
| 39 |
+
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
| 40 |
+
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
| 41 |
+
|
| 42 |
+
with pytest.raises(TypeError, match="Fill method not supported"):
|
| 43 |
+
idx.reindex(idx, method="pad", level="second")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
|
| 47 |
+
# GH6552
|
| 48 |
+
idx = idx.copy()
|
| 49 |
+
target = idx.copy()
|
| 50 |
+
idx.names = target.names = [None, None]
|
| 51 |
+
|
| 52 |
+
other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
|
| 53 |
+
|
| 54 |
+
# list & ndarray cases
|
| 55 |
+
assert idx.reindex([])[0].names == [None, None]
|
| 56 |
+
assert idx.reindex(np.array([]))[0].names == [None, None]
|
| 57 |
+
assert idx.reindex(target.tolist())[0].names == [None, None]
|
| 58 |
+
assert idx.reindex(target.values)[0].names == [None, None]
|
| 59 |
+
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
| 60 |
+
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
| 61 |
+
|
| 62 |
+
idx.names = ["foo", "bar"]
|
| 63 |
+
assert idx.reindex([])[0].names == ["foo", "bar"]
|
| 64 |
+
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
|
| 65 |
+
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
|
| 66 |
+
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
|
| 67 |
+
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
|
| 68 |
+
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
|
| 72 |
+
# GH7774
|
| 73 |
+
idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
|
| 74 |
+
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
|
| 75 |
+
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
|
| 79 |
+
using_infer_string,
|
| 80 |
+
):
|
| 81 |
+
# GH7774
|
| 82 |
+
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
| 83 |
+
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
| 84 |
+
exp = np.object_ if not using_infer_string else str
|
| 85 |
+
assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp
|
| 86 |
+
|
| 87 |
+
# case with EA levels
|
| 88 |
+
cat = pd.Categorical(["foo", "bar"])
|
| 89 |
+
dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
|
| 90 |
+
mi = MultiIndex.from_product([cat, dti])
|
| 91 |
+
assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
|
| 92 |
+
assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def test_reindex_base(idx):
|
| 96 |
+
expected = np.arange(idx.size, dtype=np.intp)
|
| 97 |
+
|
| 98 |
+
actual = idx.get_indexer(idx)
|
| 99 |
+
tm.assert_numpy_array_equal(expected, actual)
|
| 100 |
+
|
| 101 |
+
with pytest.raises(ValueError, match="Invalid fill method"):
|
| 102 |
+
idx.get_indexer(idx, method="invalid")
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def test_reindex_non_unique():
|
| 106 |
+
idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
|
| 107 |
+
a = pd.Series(np.arange(4), index=idx)
|
| 108 |
+
new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
| 109 |
+
|
| 110 |
+
msg = "cannot handle a non-unique multi-index!"
|
| 111 |
+
with pytest.raises(ValueError, match=msg):
|
| 112 |
+
a.reindex(new_idx)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
|
| 116 |
+
def test_reindex_empty_with_level(values):
|
| 117 |
+
# GH41170
|
| 118 |
+
idx = MultiIndex.from_arrays(values)
|
| 119 |
+
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
|
| 120 |
+
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
|
| 121 |
+
expected_indexer = np.array([], dtype=result_indexer.dtype)
|
| 122 |
+
tm.assert_index_equal(result, expected)
|
| 123 |
+
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def test_reindex_not_all_tuples():
|
| 127 |
+
keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
|
| 128 |
+
mi = MultiIndex.from_tuples(keys[:-1])
|
| 129 |
+
idx = Index(keys)
|
| 130 |
+
res, indexer = mi.reindex(idx)
|
| 131 |
+
|
| 132 |
+
tm.assert_index_equal(res, idx)
|
| 133 |
+
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
| 134 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def test_reindex_limit_arg_with_multiindex():
|
| 138 |
+
# GH21247
|
| 139 |
+
|
| 140 |
+
idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
|
| 141 |
+
|
| 142 |
+
df = pd.Series([0.02, 0.01, 0.012], index=idx)
|
| 143 |
+
|
| 144 |
+
new_idx = MultiIndex.from_tuples(
|
| 145 |
+
[
|
| 146 |
+
(3, "A"),
|
| 147 |
+
(3, "B"),
|
| 148 |
+
(4, "A"),
|
| 149 |
+
(4, "B"),
|
| 150 |
+
(4, "C"),
|
| 151 |
+
(5, "B"),
|
| 152 |
+
(5, "C"),
|
| 153 |
+
(6, "B"),
|
| 154 |
+
(6, "C"),
|
| 155 |
+
]
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
with pytest.raises(
|
| 159 |
+
ValueError,
|
| 160 |
+
match="limit argument only valid if doing pad, backfill or nearest reindexing",
|
| 161 |
+
):
|
| 162 |
+
df.reindex(new_idx, fill_value=0, limit=1)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def test_reindex_with_none_in_nested_multiindex():
|
| 166 |
+
# GH42883
|
| 167 |
+
index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)])
|
| 168 |
+
index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)])
|
| 169 |
+
df1_dtype = pd.DataFrame([1, 2], index=index)
|
| 170 |
+
df2_dtype = pd.DataFrame([2, 1], index=index2)
|
| 171 |
+
|
| 172 |
+
result = df1_dtype.reindex_like(df2_dtype)
|
| 173 |
+
expected = df2_dtype
|
| 174 |
+
tm.assert_frame_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
import pytz
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from pandas import (
|
| 9 |
+
Index,
|
| 10 |
+
MultiIndex,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_insert(idx):
|
| 16 |
+
# key contained in all levels
|
| 17 |
+
new_index = idx.insert(0, ("bar", "two"))
|
| 18 |
+
assert new_index.equal_levels(idx)
|
| 19 |
+
assert new_index[0] == ("bar", "two")
|
| 20 |
+
|
| 21 |
+
# key not contained in all levels
|
| 22 |
+
new_index = idx.insert(0, ("abc", "three"))
|
| 23 |
+
|
| 24 |
+
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
|
| 25 |
+
tm.assert_index_equal(new_index.levels[0], exp0)
|
| 26 |
+
assert new_index.names == ["first", "second"]
|
| 27 |
+
|
| 28 |
+
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
|
| 29 |
+
tm.assert_index_equal(new_index.levels[1], exp1)
|
| 30 |
+
assert new_index[0] == ("abc", "three")
|
| 31 |
+
|
| 32 |
+
# key wrong length
|
| 33 |
+
msg = "Item must have length equal to number of levels"
|
| 34 |
+
with pytest.raises(ValueError, match=msg):
|
| 35 |
+
idx.insert(0, ("foo2",))
|
| 36 |
+
|
| 37 |
+
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
|
| 38 |
+
left.set_index(["1st", "2nd"], inplace=True)
|
| 39 |
+
ts = left["3rd"].copy(deep=True)
|
| 40 |
+
|
| 41 |
+
left.loc[("b", "x"), "3rd"] = 2
|
| 42 |
+
left.loc[("b", "a"), "3rd"] = -1
|
| 43 |
+
left.loc[("b", "b"), "3rd"] = 3
|
| 44 |
+
left.loc[("a", "x"), "3rd"] = 4
|
| 45 |
+
left.loc[("a", "w"), "3rd"] = 5
|
| 46 |
+
left.loc[("a", "a"), "3rd"] = 6
|
| 47 |
+
|
| 48 |
+
ts.loc[("b", "x")] = 2
|
| 49 |
+
ts.loc["b", "a"] = -1
|
| 50 |
+
ts.loc[("b", "b")] = 3
|
| 51 |
+
ts.loc["a", "x"] = 4
|
| 52 |
+
ts.loc[("a", "w")] = 5
|
| 53 |
+
ts.loc["a", "a"] = 6
|
| 54 |
+
|
| 55 |
+
right = pd.DataFrame(
|
| 56 |
+
[
|
| 57 |
+
["a", "b", 0],
|
| 58 |
+
["b", "d", 1],
|
| 59 |
+
["b", "x", 2],
|
| 60 |
+
["b", "a", -1],
|
| 61 |
+
["b", "b", 3],
|
| 62 |
+
["a", "x", 4],
|
| 63 |
+
["a", "w", 5],
|
| 64 |
+
["a", "a", 6],
|
| 65 |
+
],
|
| 66 |
+
columns=["1st", "2nd", "3rd"],
|
| 67 |
+
)
|
| 68 |
+
right.set_index(["1st", "2nd"], inplace=True)
|
| 69 |
+
# FIXME data types changes to float because
|
| 70 |
+
# of intermediate nan insertion;
|
| 71 |
+
tm.assert_frame_equal(left, right, check_dtype=False)
|
| 72 |
+
tm.assert_series_equal(ts, right["3rd"])
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def test_insert2():
|
| 76 |
+
# GH9250
|
| 77 |
+
idx = (
|
| 78 |
+
[("test1", i) for i in range(5)]
|
| 79 |
+
+ [("test2", i) for i in range(6)]
|
| 80 |
+
+ [("test", 17), ("test", 18)]
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
|
| 84 |
+
|
| 85 |
+
left.loc[("test", 17)] = 11
|
| 86 |
+
left.loc[("test", 18)] = 12
|
| 87 |
+
|
| 88 |
+
right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
|
| 89 |
+
|
| 90 |
+
tm.assert_series_equal(left, right)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def test_append(idx):
|
| 94 |
+
result = idx[:3].append(idx[3:])
|
| 95 |
+
assert result.equals(idx)
|
| 96 |
+
|
| 97 |
+
foos = [idx[:1], idx[1:3], idx[3:]]
|
| 98 |
+
result = foos[0].append(foos[1:])
|
| 99 |
+
assert result.equals(idx)
|
| 100 |
+
|
| 101 |
+
# empty
|
| 102 |
+
result = idx.append([])
|
| 103 |
+
assert result.equals(idx)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def test_append_index():
|
| 107 |
+
idx1 = Index([1.1, 1.2, 1.3])
|
| 108 |
+
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
|
| 109 |
+
idx3 = Index(["A", "B", "C"])
|
| 110 |
+
|
| 111 |
+
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
|
| 112 |
+
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
|
| 113 |
+
|
| 114 |
+
result = idx1.append(midx_lv2)
|
| 115 |
+
|
| 116 |
+
# see gh-7112
|
| 117 |
+
tz = pytz.timezone("Asia/Tokyo")
|
| 118 |
+
expected_tuples = [
|
| 119 |
+
(1.1, tz.localize(datetime(2011, 1, 1))),
|
| 120 |
+
(1.2, tz.localize(datetime(2011, 1, 2))),
|
| 121 |
+
(1.3, tz.localize(datetime(2011, 1, 3))),
|
| 122 |
+
]
|
| 123 |
+
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
|
| 124 |
+
tm.assert_index_equal(result, expected)
|
| 125 |
+
|
| 126 |
+
result = midx_lv2.append(idx1)
|
| 127 |
+
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
|
| 128 |
+
tm.assert_index_equal(result, expected)
|
| 129 |
+
|
| 130 |
+
result = midx_lv2.append(midx_lv2)
|
| 131 |
+
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
|
| 132 |
+
tm.assert_index_equal(result, expected)
|
| 133 |
+
|
| 134 |
+
result = midx_lv2.append(midx_lv3)
|
| 135 |
+
tm.assert_index_equal(result, expected)
|
| 136 |
+
|
| 137 |
+
result = midx_lv3.append(midx_lv2)
|
| 138 |
+
expected = Index._simple_new(
|
| 139 |
+
np.array(
|
| 140 |
+
[
|
| 141 |
+
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
|
| 142 |
+
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
|
| 143 |
+
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
|
| 144 |
+
]
|
| 145 |
+
+ expected_tuples,
|
| 146 |
+
dtype=object,
|
| 147 |
+
),
|
| 148 |
+
None,
|
| 149 |
+
)
|
| 150 |
+
tm.assert_index_equal(result, expected)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
@pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)])
|
| 154 |
+
def test_append_names_match(name, exp):
|
| 155 |
+
# GH#48288
|
| 156 |
+
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
| 157 |
+
midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name])
|
| 158 |
+
result = midx.append(midx2)
|
| 159 |
+
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp])
|
| 160 |
+
tm.assert_index_equal(result, expected)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def test_append_names_dont_match():
|
| 164 |
+
# GH#48288
|
| 165 |
+
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
| 166 |
+
midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"])
|
| 167 |
+
result = midx.append(midx2)
|
| 168 |
+
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None)
|
| 169 |
+
tm.assert_index_equal(result, expected)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def test_append_overlapping_interval_levels():
|
| 173 |
+
# GH 54934
|
| 174 |
+
ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0])
|
| 175 |
+
ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5])
|
| 176 |
+
mi1 = MultiIndex.from_product([ivl1, ivl1])
|
| 177 |
+
mi2 = MultiIndex.from_product([ivl2, ivl2])
|
| 178 |
+
result = mi1.append(mi2)
|
| 179 |
+
expected = MultiIndex.from_tuples(
|
| 180 |
+
[
|
| 181 |
+
(pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)),
|
| 182 |
+
(pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)),
|
| 183 |
+
(pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)),
|
| 184 |
+
(pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)),
|
| 185 |
+
(pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)),
|
| 186 |
+
(pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)),
|
| 187 |
+
(pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)),
|
| 188 |
+
(pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)),
|
| 189 |
+
]
|
| 190 |
+
)
|
| 191 |
+
tm.assert_index_equal(result, expected)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def test_repeat():
|
| 195 |
+
reps = 2
|
| 196 |
+
numbers = [1, 2, 3]
|
| 197 |
+
names = np.array(["foo", "bar"])
|
| 198 |
+
|
| 199 |
+
m = MultiIndex.from_product([numbers, names], names=names)
|
| 200 |
+
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
| 201 |
+
tm.assert_index_equal(m.repeat(reps), expected)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def test_insert_base(idx):
|
| 205 |
+
result = idx[1:4]
|
| 206 |
+
|
| 207 |
+
# test 0th element
|
| 208 |
+
assert idx[0:4].equals(result.insert(0, idx[0]))
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def test_delete_base(idx):
|
| 212 |
+
expected = idx[1:]
|
| 213 |
+
result = idx.delete(0)
|
| 214 |
+
assert result.equals(expected)
|
| 215 |
+
assert result.name == expected.name
|
| 216 |
+
|
| 217 |
+
expected = idx[:-1]
|
| 218 |
+
result = idx.delete(-1)
|
| 219 |
+
assert result.equals(expected)
|
| 220 |
+
assert result.name == expected.name
|
| 221 |
+
|
| 222 |
+
msg = "index 6 is out of bounds for axis 0 with size 6"
|
| 223 |
+
with pytest.raises(IndexError, match=msg):
|
| 224 |
+
idx.delete(len(idx))
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py
ADDED
|
@@ -0,0 +1,772 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
CategoricalIndex,
|
| 7 |
+
DataFrame,
|
| 8 |
+
Index,
|
| 9 |
+
IntervalIndex,
|
| 10 |
+
MultiIndex,
|
| 11 |
+
Series,
|
| 12 |
+
)
|
| 13 |
+
import pandas._testing as tm
|
| 14 |
+
from pandas.api.types import (
|
| 15 |
+
is_float_dtype,
|
| 16 |
+
is_unsigned_integer_dtype,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
| 21 |
+
@pytest.mark.parametrize(
|
| 22 |
+
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
| 23 |
+
)
|
| 24 |
+
def test_set_ops_error_cases(idx, case, sort, method):
|
| 25 |
+
# non-iterable input
|
| 26 |
+
msg = "Input must be Index or array-like"
|
| 27 |
+
with pytest.raises(TypeError, match=msg):
|
| 28 |
+
getattr(idx, method)(case, sort=sort)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
| 32 |
+
def test_intersection_base(idx, sort, klass):
|
| 33 |
+
first = idx[2::-1] # first 3 elements reversed
|
| 34 |
+
second = idx[:5]
|
| 35 |
+
|
| 36 |
+
if klass is not MultiIndex:
|
| 37 |
+
second = klass(second.values)
|
| 38 |
+
|
| 39 |
+
intersect = first.intersection(second, sort=sort)
|
| 40 |
+
if sort is None:
|
| 41 |
+
expected = first.sort_values()
|
| 42 |
+
else:
|
| 43 |
+
expected = first
|
| 44 |
+
tm.assert_index_equal(intersect, expected)
|
| 45 |
+
|
| 46 |
+
msg = "other must be a MultiIndex or a list of tuples"
|
| 47 |
+
with pytest.raises(TypeError, match=msg):
|
| 48 |
+
first.intersection([1, 2, 3], sort=sort)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@pytest.mark.arm_slow
|
| 52 |
+
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
| 53 |
+
def test_union_base(idx, sort, klass):
|
| 54 |
+
first = idx[::-1]
|
| 55 |
+
second = idx[:5]
|
| 56 |
+
|
| 57 |
+
if klass is not MultiIndex:
|
| 58 |
+
second = klass(second.values)
|
| 59 |
+
|
| 60 |
+
union = first.union(second, sort=sort)
|
| 61 |
+
if sort is None:
|
| 62 |
+
expected = first.sort_values()
|
| 63 |
+
else:
|
| 64 |
+
expected = first
|
| 65 |
+
tm.assert_index_equal(union, expected)
|
| 66 |
+
|
| 67 |
+
msg = "other must be a MultiIndex or a list of tuples"
|
| 68 |
+
with pytest.raises(TypeError, match=msg):
|
| 69 |
+
first.union([1, 2, 3], sort=sort)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_difference_base(idx, sort):
|
| 73 |
+
second = idx[4:]
|
| 74 |
+
answer = idx[:4]
|
| 75 |
+
result = idx.difference(second, sort=sort)
|
| 76 |
+
|
| 77 |
+
if sort is None:
|
| 78 |
+
answer = answer.sort_values()
|
| 79 |
+
|
| 80 |
+
assert result.equals(answer)
|
| 81 |
+
tm.assert_index_equal(result, answer)
|
| 82 |
+
|
| 83 |
+
# GH 10149
|
| 84 |
+
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
| 85 |
+
for case in cases:
|
| 86 |
+
result = idx.difference(case, sort=sort)
|
| 87 |
+
tm.assert_index_equal(result, answer)
|
| 88 |
+
|
| 89 |
+
msg = "other must be a MultiIndex or a list of tuples"
|
| 90 |
+
with pytest.raises(TypeError, match=msg):
|
| 91 |
+
idx.difference([1, 2, 3], sort=sort)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def test_symmetric_difference(idx, sort):
|
| 95 |
+
first = idx[1:]
|
| 96 |
+
second = idx[:-1]
|
| 97 |
+
answer = idx[[-1, 0]]
|
| 98 |
+
result = first.symmetric_difference(second, sort=sort)
|
| 99 |
+
|
| 100 |
+
if sort is None:
|
| 101 |
+
answer = answer.sort_values()
|
| 102 |
+
|
| 103 |
+
tm.assert_index_equal(result, answer)
|
| 104 |
+
|
| 105 |
+
# GH 10149
|
| 106 |
+
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
| 107 |
+
for case in cases:
|
| 108 |
+
result = first.symmetric_difference(case, sort=sort)
|
| 109 |
+
tm.assert_index_equal(result, answer)
|
| 110 |
+
|
| 111 |
+
msg = "other must be a MultiIndex or a list of tuples"
|
| 112 |
+
with pytest.raises(TypeError, match=msg):
|
| 113 |
+
first.symmetric_difference([1, 2, 3], sort=sort)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def test_multiindex_symmetric_difference():
|
| 117 |
+
# GH 13490
|
| 118 |
+
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
|
| 119 |
+
result = idx.symmetric_difference(idx)
|
| 120 |
+
assert result.names == idx.names
|
| 121 |
+
|
| 122 |
+
idx2 = idx.copy().rename(["A", "B"])
|
| 123 |
+
result = idx.symmetric_difference(idx2)
|
| 124 |
+
assert result.names == [None, None]
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def test_empty(idx):
|
| 128 |
+
# GH 15270
|
| 129 |
+
assert not idx.empty
|
| 130 |
+
assert idx[:0].empty
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def test_difference(idx, sort):
|
| 134 |
+
first = idx
|
| 135 |
+
result = first.difference(idx[-3:], sort=sort)
|
| 136 |
+
vals = idx[:-3].values
|
| 137 |
+
|
| 138 |
+
if sort is None:
|
| 139 |
+
vals = sorted(vals)
|
| 140 |
+
|
| 141 |
+
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
|
| 142 |
+
|
| 143 |
+
assert isinstance(result, MultiIndex)
|
| 144 |
+
assert result.equals(expected)
|
| 145 |
+
assert result.names == idx.names
|
| 146 |
+
tm.assert_index_equal(result, expected)
|
| 147 |
+
|
| 148 |
+
# empty difference: reflexive
|
| 149 |
+
result = idx.difference(idx, sort=sort)
|
| 150 |
+
expected = idx[:0]
|
| 151 |
+
assert result.equals(expected)
|
| 152 |
+
assert result.names == idx.names
|
| 153 |
+
|
| 154 |
+
# empty difference: superset
|
| 155 |
+
result = idx[-3:].difference(idx, sort=sort)
|
| 156 |
+
expected = idx[:0]
|
| 157 |
+
assert result.equals(expected)
|
| 158 |
+
assert result.names == idx.names
|
| 159 |
+
|
| 160 |
+
# empty difference: degenerate
|
| 161 |
+
result = idx[:0].difference(idx, sort=sort)
|
| 162 |
+
expected = idx[:0]
|
| 163 |
+
assert result.equals(expected)
|
| 164 |
+
assert result.names == idx.names
|
| 165 |
+
|
| 166 |
+
# names not the same
|
| 167 |
+
chunklet = idx[-3:]
|
| 168 |
+
chunklet.names = ["foo", "baz"]
|
| 169 |
+
result = first.difference(chunklet, sort=sort)
|
| 170 |
+
assert result.names == (None, None)
|
| 171 |
+
|
| 172 |
+
# empty, but non-equal
|
| 173 |
+
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
|
| 174 |
+
assert len(result) == 0
|
| 175 |
+
|
| 176 |
+
# raise Exception called with non-MultiIndex
|
| 177 |
+
result = first.difference(first.values, sort=sort)
|
| 178 |
+
assert result.equals(first[:0])
|
| 179 |
+
|
| 180 |
+
# name from empty array
|
| 181 |
+
result = first.difference([], sort=sort)
|
| 182 |
+
assert first.equals(result)
|
| 183 |
+
assert first.names == result.names
|
| 184 |
+
|
| 185 |
+
# name from non-empty array
|
| 186 |
+
result = first.difference([("foo", "one")], sort=sort)
|
| 187 |
+
expected = MultiIndex.from_tuples(
|
| 188 |
+
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
|
| 189 |
+
)
|
| 190 |
+
expected.names = first.names
|
| 191 |
+
assert first.names == result.names
|
| 192 |
+
|
| 193 |
+
msg = "other must be a MultiIndex or a list of tuples"
|
| 194 |
+
with pytest.raises(TypeError, match=msg):
|
| 195 |
+
first.difference([1, 2, 3, 4, 5], sort=sort)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def test_difference_sort_special():
|
| 199 |
+
# GH-24959
|
| 200 |
+
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
| 201 |
+
# sort=None, the default
|
| 202 |
+
result = idx.difference([])
|
| 203 |
+
tm.assert_index_equal(result, idx)
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def test_difference_sort_special_true():
|
| 207 |
+
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
| 208 |
+
result = idx.difference([], sort=True)
|
| 209 |
+
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
| 210 |
+
tm.assert_index_equal(result, expected)
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def test_difference_sort_incomparable():
|
| 214 |
+
# GH-24959
|
| 215 |
+
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
| 216 |
+
|
| 217 |
+
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
| 218 |
+
# sort=None, the default
|
| 219 |
+
msg = "sort order is undefined for incomparable objects"
|
| 220 |
+
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
| 221 |
+
result = idx.difference(other)
|
| 222 |
+
tm.assert_index_equal(result, idx)
|
| 223 |
+
|
| 224 |
+
# sort=False
|
| 225 |
+
result = idx.difference(other, sort=False)
|
| 226 |
+
tm.assert_index_equal(result, idx)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def test_difference_sort_incomparable_true():
|
| 230 |
+
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
| 231 |
+
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
| 232 |
+
|
| 233 |
+
# TODO: this is raising in constructing a Categorical when calling
|
| 234 |
+
# algos.safe_sort. Should we catch and re-raise with a better message?
|
| 235 |
+
msg = "'values' is not ordered, please explicitly specify the categories order "
|
| 236 |
+
with pytest.raises(TypeError, match=msg):
|
| 237 |
+
idx.difference(other, sort=True)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def test_union(idx, sort):
|
| 241 |
+
piece1 = idx[:5][::-1]
|
| 242 |
+
piece2 = idx[3:]
|
| 243 |
+
|
| 244 |
+
the_union = piece1.union(piece2, sort=sort)
|
| 245 |
+
|
| 246 |
+
if sort in (None, False):
|
| 247 |
+
tm.assert_index_equal(the_union.sort_values(), idx.sort_values())
|
| 248 |
+
else:
|
| 249 |
+
tm.assert_index_equal(the_union, idx)
|
| 250 |
+
|
| 251 |
+
# corner case, pass self or empty thing:
|
| 252 |
+
the_union = idx.union(idx, sort=sort)
|
| 253 |
+
tm.assert_index_equal(the_union, idx)
|
| 254 |
+
|
| 255 |
+
the_union = idx.union(idx[:0], sort=sort)
|
| 256 |
+
tm.assert_index_equal(the_union, idx)
|
| 257 |
+
|
| 258 |
+
tuples = idx.values
|
| 259 |
+
result = idx[:4].union(tuples[4:], sort=sort)
|
| 260 |
+
if sort is None:
|
| 261 |
+
tm.assert_index_equal(result.sort_values(), idx.sort_values())
|
| 262 |
+
else:
|
| 263 |
+
assert result.equals(idx)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def test_union_with_regular_index(idx, using_infer_string):
|
| 267 |
+
other = Index(["A", "B", "C"])
|
| 268 |
+
|
| 269 |
+
result = other.union(idx)
|
| 270 |
+
assert ("foo", "one") in result
|
| 271 |
+
assert "B" in result
|
| 272 |
+
|
| 273 |
+
if using_infer_string:
|
| 274 |
+
with pytest.raises(NotImplementedError, match="Can only union"):
|
| 275 |
+
idx.union(other)
|
| 276 |
+
else:
|
| 277 |
+
msg = "The values in the array are unorderable"
|
| 278 |
+
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
| 279 |
+
result2 = idx.union(other)
|
| 280 |
+
# This is more consistent now, if sorting fails then we don't sort at all
|
| 281 |
+
# in the MultiIndex case.
|
| 282 |
+
assert not result.equals(result2)
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def test_intersection(idx, sort):
|
| 286 |
+
piece1 = idx[:5][::-1]
|
| 287 |
+
piece2 = idx[3:]
|
| 288 |
+
|
| 289 |
+
the_int = piece1.intersection(piece2, sort=sort)
|
| 290 |
+
|
| 291 |
+
if sort in (None, True):
|
| 292 |
+
tm.assert_index_equal(the_int, idx[3:5])
|
| 293 |
+
else:
|
| 294 |
+
tm.assert_index_equal(the_int.sort_values(), idx[3:5])
|
| 295 |
+
|
| 296 |
+
# corner case, pass self
|
| 297 |
+
the_int = idx.intersection(idx, sort=sort)
|
| 298 |
+
tm.assert_index_equal(the_int, idx)
|
| 299 |
+
|
| 300 |
+
# empty intersection: disjoint
|
| 301 |
+
empty = idx[:2].intersection(idx[2:], sort=sort)
|
| 302 |
+
expected = idx[:0]
|
| 303 |
+
assert empty.equals(expected)
|
| 304 |
+
|
| 305 |
+
tuples = idx.values
|
| 306 |
+
result = idx.intersection(tuples)
|
| 307 |
+
assert result.equals(idx)
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
@pytest.mark.parametrize(
|
| 311 |
+
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
| 312 |
+
)
|
| 313 |
+
def test_setop_with_categorical(idx, sort, method):
|
| 314 |
+
other = idx.to_flat_index().astype("category")
|
| 315 |
+
res_names = [None] * idx.nlevels
|
| 316 |
+
|
| 317 |
+
result = getattr(idx, method)(other, sort=sort)
|
| 318 |
+
expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
|
| 319 |
+
tm.assert_index_equal(result, expected)
|
| 320 |
+
|
| 321 |
+
result = getattr(idx, method)(other[:5], sort=sort)
|
| 322 |
+
expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
|
| 323 |
+
tm.assert_index_equal(result, expected)
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def test_intersection_non_object(idx, sort):
|
| 327 |
+
other = Index(range(3), name="foo")
|
| 328 |
+
|
| 329 |
+
result = idx.intersection(other, sort=sort)
|
| 330 |
+
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
|
| 331 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 332 |
+
|
| 333 |
+
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
|
| 334 |
+
result = idx.intersection(np.asarray(other)[:0], sort=sort)
|
| 335 |
+
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
|
| 336 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 337 |
+
|
| 338 |
+
msg = "other must be a MultiIndex or a list of tuples"
|
| 339 |
+
with pytest.raises(TypeError, match=msg):
|
| 340 |
+
# With non-zero length non-index, we try and fail to convert to tuples
|
| 341 |
+
idx.intersection(np.asarray(other), sort=sort)
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def test_intersect_equal_sort():
|
| 345 |
+
# GH-24959
|
| 346 |
+
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
| 347 |
+
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
| 348 |
+
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def test_intersect_equal_sort_true():
|
| 352 |
+
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
| 353 |
+
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
| 354 |
+
result = idx.intersection(idx, sort=True)
|
| 355 |
+
tm.assert_index_equal(result, expected)
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
|
| 359 |
+
def test_union_sort_other_empty(slice_):
|
| 360 |
+
# https://github.com/pandas-dev/pandas/issues/24959
|
| 361 |
+
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
| 362 |
+
|
| 363 |
+
# default, sort=None
|
| 364 |
+
other = idx[slice_]
|
| 365 |
+
tm.assert_index_equal(idx.union(other), idx)
|
| 366 |
+
tm.assert_index_equal(other.union(idx), idx)
|
| 367 |
+
|
| 368 |
+
# sort=False
|
| 369 |
+
tm.assert_index_equal(idx.union(other, sort=False), idx)
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
def test_union_sort_other_empty_sort():
|
| 373 |
+
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
| 374 |
+
other = idx[:0]
|
| 375 |
+
result = idx.union(other, sort=True)
|
| 376 |
+
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
| 377 |
+
tm.assert_index_equal(result, expected)
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def test_union_sort_other_incomparable():
|
| 381 |
+
# https://github.com/pandas-dev/pandas/issues/24959
|
| 382 |
+
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
| 383 |
+
|
| 384 |
+
# default, sort=None
|
| 385 |
+
with tm.assert_produces_warning(RuntimeWarning):
|
| 386 |
+
result = idx.union(idx[:1])
|
| 387 |
+
tm.assert_index_equal(result, idx)
|
| 388 |
+
|
| 389 |
+
# sort=False
|
| 390 |
+
result = idx.union(idx[:1], sort=False)
|
| 391 |
+
tm.assert_index_equal(result, idx)
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def test_union_sort_other_incomparable_sort():
|
| 395 |
+
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
| 396 |
+
msg = "'<' not supported between instances of 'Timestamp' and 'int'"
|
| 397 |
+
with pytest.raises(TypeError, match=msg):
|
| 398 |
+
idx.union(idx[:1], sort=True)
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def test_union_non_object_dtype_raises():
|
| 402 |
+
# GH#32646 raise NotImplementedError instead of less-informative error
|
| 403 |
+
mi = MultiIndex.from_product([["a", "b"], [1, 2]])
|
| 404 |
+
|
| 405 |
+
idx = mi.levels[1]
|
| 406 |
+
|
| 407 |
+
msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
|
| 408 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 409 |
+
mi.union(idx)
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
def test_union_empty_self_different_names():
|
| 413 |
+
# GH#38423
|
| 414 |
+
mi = MultiIndex.from_arrays([[]])
|
| 415 |
+
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
| 416 |
+
result = mi.union(mi2)
|
| 417 |
+
expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
| 418 |
+
tm.assert_index_equal(result, expected)
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
def test_union_multiindex_empty_rangeindex():
|
| 422 |
+
# GH#41234
|
| 423 |
+
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
| 424 |
+
ri = pd.RangeIndex(0)
|
| 425 |
+
|
| 426 |
+
result_left = mi.union(ri)
|
| 427 |
+
tm.assert_index_equal(mi, result_left, check_names=False)
|
| 428 |
+
|
| 429 |
+
result_right = ri.union(mi)
|
| 430 |
+
tm.assert_index_equal(mi, result_right, check_names=False)
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
@pytest.mark.parametrize(
|
| 434 |
+
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
| 435 |
+
)
|
| 436 |
+
def test_setops_sort_validation(method):
|
| 437 |
+
idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
|
| 438 |
+
idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
|
| 439 |
+
|
| 440 |
+
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
| 441 |
+
getattr(idx1, method)(idx2, sort=2)
|
| 442 |
+
|
| 443 |
+
# sort=True is supported as of GH#?
|
| 444 |
+
getattr(idx1, method)(idx2, sort=True)
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
@pytest.mark.parametrize("val", [pd.NA, 100])
|
| 448 |
+
def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
|
| 449 |
+
# GH#48606
|
| 450 |
+
midx = MultiIndex.from_arrays(
|
| 451 |
+
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
| 452 |
+
)
|
| 453 |
+
midx2 = MultiIndex.from_arrays(
|
| 454 |
+
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
| 455 |
+
)
|
| 456 |
+
result = midx.difference(midx2)
|
| 457 |
+
expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]])
|
| 458 |
+
tm.assert_index_equal(result, expected)
|
| 459 |
+
|
| 460 |
+
result = midx.difference(midx.sort_values(ascending=False))
|
| 461 |
+
expected = MultiIndex.from_arrays(
|
| 462 |
+
[Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)],
|
| 463 |
+
names=["a", None],
|
| 464 |
+
)
|
| 465 |
+
tm.assert_index_equal(result, expected)
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
@pytest.mark.parametrize("val", [pd.NA, 5])
|
| 469 |
+
def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
|
| 470 |
+
# GH#48607
|
| 471 |
+
midx = MultiIndex.from_arrays(
|
| 472 |
+
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
| 473 |
+
)
|
| 474 |
+
midx2 = MultiIndex.from_arrays(
|
| 475 |
+
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
| 476 |
+
)
|
| 477 |
+
result = midx.symmetric_difference(midx2)
|
| 478 |
+
expected = MultiIndex.from_arrays(
|
| 479 |
+
[Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
|
| 480 |
+
)
|
| 481 |
+
tm.assert_index_equal(result, expected)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
@pytest.mark.parametrize(
|
| 485 |
+
("tuples", "exp_tuples"),
|
| 486 |
+
[
|
| 487 |
+
([("val1", "test1")], [("val1", "test1")]),
|
| 488 |
+
([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
|
| 489 |
+
(
|
| 490 |
+
[("val2", "test2"), ("val1", "test1")],
|
| 491 |
+
[("val2", "test2"), ("val1", "test1")],
|
| 492 |
+
),
|
| 493 |
+
],
|
| 494 |
+
)
|
| 495 |
+
def test_intersect_with_duplicates(tuples, exp_tuples):
|
| 496 |
+
# GH#36915
|
| 497 |
+
left = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
| 498 |
+
right = MultiIndex.from_tuples(
|
| 499 |
+
[("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
|
| 500 |
+
names=["first", "second"],
|
| 501 |
+
)
|
| 502 |
+
result = left.intersection(right)
|
| 503 |
+
expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
|
| 504 |
+
tm.assert_index_equal(result, expected)
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
@pytest.mark.parametrize(
|
| 508 |
+
"data, names, expected",
|
| 509 |
+
[
|
| 510 |
+
((1,), None, [None, None]),
|
| 511 |
+
((1,), ["a"], [None, None]),
|
| 512 |
+
((1,), ["b"], [None, None]),
|
| 513 |
+
((1, 2), ["c", "d"], [None, None]),
|
| 514 |
+
((1, 2), ["b", "a"], [None, None]),
|
| 515 |
+
((1, 2, 3), ["a", "b", "c"], [None, None]),
|
| 516 |
+
((1, 2), ["a", "c"], ["a", None]),
|
| 517 |
+
((1, 2), ["c", "b"], [None, "b"]),
|
| 518 |
+
((1, 2), ["a", "b"], ["a", "b"]),
|
| 519 |
+
((1, 2), [None, "b"], [None, "b"]),
|
| 520 |
+
],
|
| 521 |
+
)
|
| 522 |
+
def test_maybe_match_names(data, names, expected):
|
| 523 |
+
# GH#38323
|
| 524 |
+
mi = MultiIndex.from_tuples([], names=["a", "b"])
|
| 525 |
+
mi2 = MultiIndex.from_tuples([data], names=names)
|
| 526 |
+
result = mi._maybe_match_names(mi2)
|
| 527 |
+
assert result == expected
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
def test_intersection_equal_different_names():
|
| 531 |
+
# GH#30302
|
| 532 |
+
mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
|
| 533 |
+
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
| 534 |
+
|
| 535 |
+
result = mi1.intersection(mi2)
|
| 536 |
+
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
|
| 537 |
+
tm.assert_index_equal(result, expected)
|
| 538 |
+
|
| 539 |
+
|
| 540 |
+
def test_intersection_different_names():
|
| 541 |
+
# GH#38323
|
| 542 |
+
mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
|
| 543 |
+
mi2 = MultiIndex.from_arrays([[1], [3]])
|
| 544 |
+
result = mi.intersection(mi2)
|
| 545 |
+
tm.assert_index_equal(result, mi2)
|
| 546 |
+
|
| 547 |
+
|
| 548 |
+
def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
|
| 549 |
+
# GH#38623
|
| 550 |
+
mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
|
| 551 |
+
mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
|
| 552 |
+
result = mi1.intersection(mi2)
|
| 553 |
+
expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]])
|
| 554 |
+
tm.assert_index_equal(result, expected)
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def test_union_with_missing_values_on_both_sides(nulls_fixture):
|
| 558 |
+
# GH#38623
|
| 559 |
+
mi1 = MultiIndex.from_arrays([[1, nulls_fixture]])
|
| 560 |
+
mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]])
|
| 561 |
+
result = mi1.union(mi2)
|
| 562 |
+
expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]])
|
| 563 |
+
tm.assert_index_equal(result, expected)
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
| 567 |
+
@pytest.mark.parametrize("sort", [None, False])
|
| 568 |
+
def test_union_nan_got_duplicated(dtype, sort):
|
| 569 |
+
# GH#38977, GH#49010
|
| 570 |
+
mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]])
|
| 571 |
+
mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]])
|
| 572 |
+
result = mi1.union(mi2, sort=sort)
|
| 573 |
+
if sort is None:
|
| 574 |
+
expected = MultiIndex.from_arrays(
|
| 575 |
+
[pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]]
|
| 576 |
+
)
|
| 577 |
+
else:
|
| 578 |
+
expected = mi2
|
| 579 |
+
tm.assert_index_equal(result, expected)
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
@pytest.mark.parametrize("val", [4, 1])
|
| 583 |
+
def test_union_keep_ea_dtype(any_numeric_ea_dtype, val):
|
| 584 |
+
# GH#48505
|
| 585 |
+
|
| 586 |
+
arr1 = Series([val, 2], dtype=any_numeric_ea_dtype)
|
| 587 |
+
arr2 = Series([2, 1], dtype=any_numeric_ea_dtype)
|
| 588 |
+
midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None])
|
| 589 |
+
midx2 = MultiIndex.from_arrays([arr2, [2, 1]])
|
| 590 |
+
result = midx.union(midx2)
|
| 591 |
+
if val == 4:
|
| 592 |
+
expected = MultiIndex.from_arrays(
|
| 593 |
+
[Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]]
|
| 594 |
+
)
|
| 595 |
+
else:
|
| 596 |
+
expected = MultiIndex.from_arrays(
|
| 597 |
+
[Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]]
|
| 598 |
+
)
|
| 599 |
+
tm.assert_index_equal(result, expected)
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
@pytest.mark.parametrize("dupe_val", [3, pd.NA])
|
| 603 |
+
def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype):
|
| 604 |
+
# GH48900
|
| 605 |
+
mi1 = MultiIndex.from_arrays(
|
| 606 |
+
[
|
| 607 |
+
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
|
| 608 |
+
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
|
| 609 |
+
]
|
| 610 |
+
)
|
| 611 |
+
mi2 = MultiIndex.from_arrays(
|
| 612 |
+
[
|
| 613 |
+
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
| 614 |
+
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
| 615 |
+
]
|
| 616 |
+
)
|
| 617 |
+
result = mi1.union(mi2)
|
| 618 |
+
expected = MultiIndex.from_arrays(
|
| 619 |
+
[
|
| 620 |
+
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
| 621 |
+
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
| 622 |
+
]
|
| 623 |
+
)
|
| 624 |
+
tm.assert_index_equal(result, expected)
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
| 628 |
+
def test_union_duplicates(index, request):
|
| 629 |
+
# GH#38977
|
| 630 |
+
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
|
| 631 |
+
pytest.skip(f"No duplicates in an empty {type(index).__name__}")
|
| 632 |
+
|
| 633 |
+
values = index.unique().values.tolist()
|
| 634 |
+
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
|
| 635 |
+
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
|
| 636 |
+
result = mi2.union(mi1)
|
| 637 |
+
expected = mi2.sort_values()
|
| 638 |
+
tm.assert_index_equal(result, expected)
|
| 639 |
+
|
| 640 |
+
if (
|
| 641 |
+
is_unsigned_integer_dtype(mi2.levels[0])
|
| 642 |
+
and (mi2.get_level_values(0) < 2**63).all()
|
| 643 |
+
):
|
| 644 |
+
# GH#47294 - union uses lib.fast_zip, converting data to Python integers
|
| 645 |
+
# and loses type information. Result is then unsigned only when values are
|
| 646 |
+
# sufficiently large to require unsigned dtype. This happens only if other
|
| 647 |
+
# has dups or one of both have missing values
|
| 648 |
+
expected = expected.set_levels(
|
| 649 |
+
[expected.levels[0].astype(np.int64), expected.levels[1]]
|
| 650 |
+
)
|
| 651 |
+
elif is_float_dtype(mi2.levels[0]):
|
| 652 |
+
# mi2 has duplicates witch is a different path than above, Fix that path
|
| 653 |
+
# to use correct float dtype?
|
| 654 |
+
expected = expected.set_levels(
|
| 655 |
+
[expected.levels[0].astype(float), expected.levels[1]]
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
result = mi1.union(mi2)
|
| 659 |
+
tm.assert_index_equal(result, expected)
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
def test_union_keep_dtype_precision(any_real_numeric_dtype):
|
| 663 |
+
# GH#48498
|
| 664 |
+
arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype)
|
| 665 |
+
arr2 = Series([1, 4], dtype=any_real_numeric_dtype)
|
| 666 |
+
midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None])
|
| 667 |
+
midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None])
|
| 668 |
+
|
| 669 |
+
result = midx.union(midx2)
|
| 670 |
+
expected = MultiIndex.from_arrays(
|
| 671 |
+
([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]),
|
| 672 |
+
names=["a", None],
|
| 673 |
+
)
|
| 674 |
+
tm.assert_index_equal(result, expected)
|
| 675 |
+
|
| 676 |
+
|
| 677 |
+
def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype):
|
| 678 |
+
# GH#48498
|
| 679 |
+
arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype)
|
| 680 |
+
arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
|
| 681 |
+
midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None])
|
| 682 |
+
midx2 = MultiIndex.from_arrays([arr2, [1, 2]])
|
| 683 |
+
result = midx.union(midx2)
|
| 684 |
+
expected = MultiIndex.from_arrays(
|
| 685 |
+
[Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]]
|
| 686 |
+
)
|
| 687 |
+
tm.assert_index_equal(result, expected)
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
@pytest.mark.parametrize(
|
| 691 |
+
"levels1, levels2, codes1, codes2, names",
|
| 692 |
+
[
|
| 693 |
+
(
|
| 694 |
+
[["a", "b", "c"], [0, ""]],
|
| 695 |
+
[["c", "d", "b"], [""]],
|
| 696 |
+
[[0, 1, 2], [1, 1, 1]],
|
| 697 |
+
[[0, 1, 2], [0, 0, 0]],
|
| 698 |
+
["name1", "name2"],
|
| 699 |
+
),
|
| 700 |
+
],
|
| 701 |
+
)
|
| 702 |
+
def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
|
| 703 |
+
# GH#25169
|
| 704 |
+
mi1 = MultiIndex(levels=levels1, codes=codes1, names=names)
|
| 705 |
+
mi2 = MultiIndex(levels=levels2, codes=codes2, names=names)
|
| 706 |
+
mi_int = mi1.intersection(mi2)
|
| 707 |
+
assert mi_int._lexsort_depth == 2
|
| 708 |
+
|
| 709 |
+
|
| 710 |
+
@pytest.mark.parametrize(
|
| 711 |
+
"a",
|
| 712 |
+
[pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]],
|
| 713 |
+
)
|
| 714 |
+
@pytest.mark.parametrize(
|
| 715 |
+
"b",
|
| 716 |
+
[
|
| 717 |
+
pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True),
|
| 718 |
+
pd.Categorical(["a", "b"], categories=["b", "a"]),
|
| 719 |
+
],
|
| 720 |
+
)
|
| 721 |
+
def test_intersection_with_non_lex_sorted_categories(a, b):
|
| 722 |
+
# GH#49974
|
| 723 |
+
other = ["1", "2"]
|
| 724 |
+
|
| 725 |
+
df1 = DataFrame({"x": a, "y": other})
|
| 726 |
+
df2 = DataFrame({"x": b, "y": other})
|
| 727 |
+
|
| 728 |
+
expected = MultiIndex.from_arrays([a, other], names=["x", "y"])
|
| 729 |
+
|
| 730 |
+
res1 = MultiIndex.from_frame(df1).intersection(
|
| 731 |
+
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
|
| 732 |
+
)
|
| 733 |
+
res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2))
|
| 734 |
+
res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
|
| 735 |
+
MultiIndex.from_frame(df2)
|
| 736 |
+
)
|
| 737 |
+
res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
|
| 738 |
+
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
|
| 739 |
+
)
|
| 740 |
+
|
| 741 |
+
tm.assert_index_equal(res1, expected)
|
| 742 |
+
tm.assert_index_equal(res2, expected)
|
| 743 |
+
tm.assert_index_equal(res3, expected)
|
| 744 |
+
tm.assert_index_equal(res4, expected)
|
| 745 |
+
|
| 746 |
+
|
| 747 |
+
@pytest.mark.parametrize("val", [pd.NA, 100])
|
| 748 |
+
def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
|
| 749 |
+
# GH#48604
|
| 750 |
+
midx = MultiIndex.from_arrays(
|
| 751 |
+
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
| 752 |
+
)
|
| 753 |
+
midx2 = MultiIndex.from_arrays(
|
| 754 |
+
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
| 755 |
+
)
|
| 756 |
+
result = midx.intersection(midx2)
|
| 757 |
+
expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]])
|
| 758 |
+
tm.assert_index_equal(result, expected)
|
| 759 |
+
|
| 760 |
+
|
| 761 |
+
def test_union_with_na_when_constructing_dataframe():
|
| 762 |
+
# GH43222
|
| 763 |
+
series1 = Series(
|
| 764 |
+
(1,),
|
| 765 |
+
index=MultiIndex.from_arrays(
|
| 766 |
+
[Series([None], dtype="str"), Series([None], dtype="str")]
|
| 767 |
+
),
|
| 768 |
+
)
|
| 769 |
+
series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
|
| 770 |
+
result = DataFrame([series1, series2])
|
| 771 |
+
expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})
|
| 772 |
+
tm.assert_frame_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.errors import (
|
| 5 |
+
PerformanceWarning,
|
| 6 |
+
UnsortedIndexError,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
from pandas import (
|
| 10 |
+
CategoricalIndex,
|
| 11 |
+
DataFrame,
|
| 12 |
+
Index,
|
| 13 |
+
MultiIndex,
|
| 14 |
+
RangeIndex,
|
| 15 |
+
Series,
|
| 16 |
+
Timestamp,
|
| 17 |
+
)
|
| 18 |
+
import pandas._testing as tm
|
| 19 |
+
from pandas.core.indexes.frozen import FrozenList
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_sortlevel(idx):
|
| 23 |
+
tuples = list(idx)
|
| 24 |
+
np.random.default_rng(2).shuffle(tuples)
|
| 25 |
+
|
| 26 |
+
index = MultiIndex.from_tuples(tuples)
|
| 27 |
+
|
| 28 |
+
sorted_idx, _ = index.sortlevel(0)
|
| 29 |
+
expected = MultiIndex.from_tuples(sorted(tuples))
|
| 30 |
+
assert sorted_idx.equals(expected)
|
| 31 |
+
|
| 32 |
+
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
| 33 |
+
assert sorted_idx.equals(expected[::-1])
|
| 34 |
+
|
| 35 |
+
sorted_idx, _ = index.sortlevel(1)
|
| 36 |
+
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
| 37 |
+
expected = MultiIndex.from_tuples(by1)
|
| 38 |
+
assert sorted_idx.equals(expected)
|
| 39 |
+
|
| 40 |
+
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
| 41 |
+
assert sorted_idx.equals(expected[::-1])
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def test_sortlevel_not_sort_remaining():
|
| 45 |
+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
| 46 |
+
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
|
| 47 |
+
assert sorted_idx.equals(mi)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def test_sortlevel_deterministic():
|
| 51 |
+
tuples = [
|
| 52 |
+
("bar", "one"),
|
| 53 |
+
("foo", "two"),
|
| 54 |
+
("qux", "two"),
|
| 55 |
+
("foo", "one"),
|
| 56 |
+
("baz", "two"),
|
| 57 |
+
("qux", "one"),
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
index = MultiIndex.from_tuples(tuples)
|
| 61 |
+
|
| 62 |
+
sorted_idx, _ = index.sortlevel(0)
|
| 63 |
+
expected = MultiIndex.from_tuples(sorted(tuples))
|
| 64 |
+
assert sorted_idx.equals(expected)
|
| 65 |
+
|
| 66 |
+
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
| 67 |
+
assert sorted_idx.equals(expected[::-1])
|
| 68 |
+
|
| 69 |
+
sorted_idx, _ = index.sortlevel(1)
|
| 70 |
+
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
| 71 |
+
expected = MultiIndex.from_tuples(by1)
|
| 72 |
+
assert sorted_idx.equals(expected)
|
| 73 |
+
|
| 74 |
+
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
| 75 |
+
assert sorted_idx.equals(expected[::-1])
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def test_sortlevel_na_position():
|
| 79 |
+
# GH#51612
|
| 80 |
+
midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)])
|
| 81 |
+
result = midx.sortlevel(level=[0, 1], na_position="last")[0]
|
| 82 |
+
expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)])
|
| 83 |
+
tm.assert_index_equal(result, expected)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def test_numpy_argsort(idx):
|
| 87 |
+
result = np.argsort(idx)
|
| 88 |
+
expected = idx.argsort()
|
| 89 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 90 |
+
|
| 91 |
+
# these are the only two types that perform
|
| 92 |
+
# pandas compatibility input validation - the
|
| 93 |
+
# rest already perform separate (or no) such
|
| 94 |
+
# validation via their 'values' attribute as
|
| 95 |
+
# defined in pandas.core.indexes/base.py - they
|
| 96 |
+
# cannot be changed at the moment due to
|
| 97 |
+
# backwards compatibility concerns
|
| 98 |
+
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
|
| 99 |
+
msg = "the 'axis' parameter is not supported"
|
| 100 |
+
with pytest.raises(ValueError, match=msg):
|
| 101 |
+
np.argsort(idx, axis=1)
|
| 102 |
+
|
| 103 |
+
msg = "the 'kind' parameter is not supported"
|
| 104 |
+
with pytest.raises(ValueError, match=msg):
|
| 105 |
+
np.argsort(idx, kind="mergesort")
|
| 106 |
+
|
| 107 |
+
msg = "the 'order' parameter is not supported"
|
| 108 |
+
with pytest.raises(ValueError, match=msg):
|
| 109 |
+
np.argsort(idx, order=("a", "b"))
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def test_unsortedindex():
|
| 113 |
+
# GH 11897
|
| 114 |
+
mi = MultiIndex.from_tuples(
|
| 115 |
+
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
| 116 |
+
names=["one", "two"],
|
| 117 |
+
)
|
| 118 |
+
df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
|
| 119 |
+
|
| 120 |
+
# GH 16734: not sorted, but no real slicing
|
| 121 |
+
result = df.loc(axis=0)["z", "a"]
|
| 122 |
+
expected = df.iloc[0]
|
| 123 |
+
tm.assert_series_equal(result, expected)
|
| 124 |
+
|
| 125 |
+
msg = (
|
| 126 |
+
"MultiIndex slicing requires the index to be lexsorted: "
|
| 127 |
+
r"slicing on levels \[1\], lexsort depth 0"
|
| 128 |
+
)
|
| 129 |
+
with pytest.raises(UnsortedIndexError, match=msg):
|
| 130 |
+
df.loc(axis=0)["z", slice("a")]
|
| 131 |
+
df.sort_index(inplace=True)
|
| 132 |
+
assert len(df.loc(axis=0)["z", :]) == 2
|
| 133 |
+
|
| 134 |
+
with pytest.raises(KeyError, match="'q'"):
|
| 135 |
+
df.loc(axis=0)["q", :]
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def test_unsortedindex_doc_examples():
|
| 139 |
+
# https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
|
| 140 |
+
dfm = DataFrame(
|
| 141 |
+
{
|
| 142 |
+
"jim": [0, 0, 1, 1],
|
| 143 |
+
"joe": ["x", "x", "z", "y"],
|
| 144 |
+
"jolie": np.random.default_rng(2).random(4),
|
| 145 |
+
}
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
dfm = dfm.set_index(["jim", "joe"])
|
| 149 |
+
with tm.assert_produces_warning(PerformanceWarning):
|
| 150 |
+
dfm.loc[(1, "z")]
|
| 151 |
+
|
| 152 |
+
msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
|
| 153 |
+
with pytest.raises(UnsortedIndexError, match=msg):
|
| 154 |
+
dfm.loc[(0, "y"):(1, "z")]
|
| 155 |
+
|
| 156 |
+
assert not dfm.index._is_lexsorted()
|
| 157 |
+
assert dfm.index._lexsort_depth == 1
|
| 158 |
+
|
| 159 |
+
# sort it
|
| 160 |
+
dfm = dfm.sort_index()
|
| 161 |
+
dfm.loc[(1, "z")]
|
| 162 |
+
dfm.loc[(0, "y"):(1, "z")]
|
| 163 |
+
|
| 164 |
+
assert dfm.index._is_lexsorted()
|
| 165 |
+
assert dfm.index._lexsort_depth == 2
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def test_reconstruct_sort():
|
| 169 |
+
# starts off lexsorted & monotonic
|
| 170 |
+
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
| 171 |
+
assert mi.is_monotonic_increasing
|
| 172 |
+
recons = mi._sort_levels_monotonic()
|
| 173 |
+
assert recons.is_monotonic_increasing
|
| 174 |
+
assert mi is recons
|
| 175 |
+
|
| 176 |
+
assert mi.equals(recons)
|
| 177 |
+
assert Index(mi.values).equals(Index(recons.values))
|
| 178 |
+
|
| 179 |
+
# cannot convert to lexsorted
|
| 180 |
+
mi = MultiIndex.from_tuples(
|
| 181 |
+
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
| 182 |
+
names=["one", "two"],
|
| 183 |
+
)
|
| 184 |
+
assert not mi.is_monotonic_increasing
|
| 185 |
+
recons = mi._sort_levels_monotonic()
|
| 186 |
+
assert not recons.is_monotonic_increasing
|
| 187 |
+
assert mi.equals(recons)
|
| 188 |
+
assert Index(mi.values).equals(Index(recons.values))
|
| 189 |
+
|
| 190 |
+
# cannot convert to lexsorted
|
| 191 |
+
mi = MultiIndex(
|
| 192 |
+
levels=[["b", "d", "a"], [1, 2, 3]],
|
| 193 |
+
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
| 194 |
+
names=["col1", "col2"],
|
| 195 |
+
)
|
| 196 |
+
assert not mi.is_monotonic_increasing
|
| 197 |
+
recons = mi._sort_levels_monotonic()
|
| 198 |
+
assert not recons.is_monotonic_increasing
|
| 199 |
+
assert mi.equals(recons)
|
| 200 |
+
assert Index(mi.values).equals(Index(recons.values))
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def test_reconstruct_remove_unused():
|
| 204 |
+
# xref to GH 2770
|
| 205 |
+
df = DataFrame(
|
| 206 |
+
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
|
| 207 |
+
columns=["first", "second", "third"],
|
| 208 |
+
)
|
| 209 |
+
df2 = df.set_index(["first", "second"], drop=False)
|
| 210 |
+
df2 = df2[df2["first"] != "deleteMe"]
|
| 211 |
+
|
| 212 |
+
# removed levels are there
|
| 213 |
+
expected = MultiIndex(
|
| 214 |
+
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
|
| 215 |
+
codes=[[1, 2], [1, 2]],
|
| 216 |
+
names=["first", "second"],
|
| 217 |
+
)
|
| 218 |
+
result = df2.index
|
| 219 |
+
tm.assert_index_equal(result, expected)
|
| 220 |
+
|
| 221 |
+
expected = MultiIndex(
|
| 222 |
+
levels=[["keepMe", "keepMeToo"], [2, 3]],
|
| 223 |
+
codes=[[0, 1], [0, 1]],
|
| 224 |
+
names=["first", "second"],
|
| 225 |
+
)
|
| 226 |
+
result = df2.index.remove_unused_levels()
|
| 227 |
+
tm.assert_index_equal(result, expected)
|
| 228 |
+
|
| 229 |
+
# idempotent
|
| 230 |
+
result2 = result.remove_unused_levels()
|
| 231 |
+
tm.assert_index_equal(result2, expected)
|
| 232 |
+
assert result2.is_(result)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
@pytest.mark.parametrize(
|
| 236 |
+
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
|
| 237 |
+
)
|
| 238 |
+
def test_remove_unused_levels_large(first_type, second_type):
|
| 239 |
+
# GH16556
|
| 240 |
+
|
| 241 |
+
# because tests should be deterministic (and this test in particular
|
| 242 |
+
# checks that levels are removed, which is not the case for every
|
| 243 |
+
# random input):
|
| 244 |
+
rng = np.random.default_rng(10) # seed is arbitrary value that works
|
| 245 |
+
|
| 246 |
+
size = 1 << 16
|
| 247 |
+
df = DataFrame(
|
| 248 |
+
{
|
| 249 |
+
"first": rng.integers(0, 1 << 13, size).astype(first_type),
|
| 250 |
+
"second": rng.integers(0, 1 << 10, size).astype(second_type),
|
| 251 |
+
"third": rng.random(size),
|
| 252 |
+
}
|
| 253 |
+
)
|
| 254 |
+
df = df.groupby(["first", "second"]).sum()
|
| 255 |
+
df = df[df.third < 0.1]
|
| 256 |
+
|
| 257 |
+
result = df.index.remove_unused_levels()
|
| 258 |
+
assert len(result.levels[0]) < len(df.index.levels[0])
|
| 259 |
+
assert len(result.levels[1]) < len(df.index.levels[1])
|
| 260 |
+
assert result.equals(df.index)
|
| 261 |
+
|
| 262 |
+
expected = df.reset_index().set_index(["first", "second"]).index
|
| 263 |
+
tm.assert_index_equal(result, expected)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
|
| 267 |
+
@pytest.mark.parametrize(
|
| 268 |
+
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
|
| 269 |
+
)
|
| 270 |
+
def test_remove_unused_nan(level0, level1):
|
| 271 |
+
# GH 18417
|
| 272 |
+
mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
|
| 273 |
+
|
| 274 |
+
result = mi.remove_unused_levels()
|
| 275 |
+
tm.assert_index_equal(result, mi)
|
| 276 |
+
for level in 0, 1:
|
| 277 |
+
assert "unused" not in result.levels[level]
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def test_argsort(idx):
|
| 281 |
+
result = idx.argsort()
|
| 282 |
+
expected = idx.values.argsort()
|
| 283 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def test_remove_unused_levels_with_nan():
|
| 287 |
+
# GH 37510
|
| 288 |
+
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
|
| 289 |
+
idx = idx.set_levels(["a", np.nan], level="id1")
|
| 290 |
+
idx = idx.remove_unused_levels()
|
| 291 |
+
result = idx.levels
|
| 292 |
+
expected = FrozenList([["a", np.nan], [4]])
|
| 293 |
+
assert str(result) == str(expected)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def test_sort_values_nan():
|
| 297 |
+
# GH48495, GH48626
|
| 298 |
+
midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]])
|
| 299 |
+
result = midx.sort_values()
|
| 300 |
+
expected = MultiIndex(
|
| 301 |
+
levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]]
|
| 302 |
+
)
|
| 303 |
+
tm.assert_index_equal(result, expected)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def test_sort_values_incomparable():
|
| 307 |
+
# GH48495
|
| 308 |
+
mi = MultiIndex.from_arrays(
|
| 309 |
+
[
|
| 310 |
+
[1, Timestamp("2000-01-01")],
|
| 311 |
+
[3, 4],
|
| 312 |
+
]
|
| 313 |
+
)
|
| 314 |
+
match = "'<' not supported between instances of 'Timestamp' and 'int'"
|
| 315 |
+
with pytest.raises(TypeError, match=match):
|
| 316 |
+
mi.sort_values()
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
@pytest.mark.parametrize("na_position", ["first", "last"])
|
| 320 |
+
@pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"])
|
| 321 |
+
def test_sort_values_with_na_na_position(dtype, na_position):
|
| 322 |
+
# 51612
|
| 323 |
+
arrays = [
|
| 324 |
+
Series([1, 1, 2], dtype=dtype),
|
| 325 |
+
Series([1, None, 3], dtype=dtype),
|
| 326 |
+
]
|
| 327 |
+
index = MultiIndex.from_arrays(arrays)
|
| 328 |
+
result = index.sort_values(na_position=na_position)
|
| 329 |
+
if na_position == "first":
|
| 330 |
+
arrays = [
|
| 331 |
+
Series([1, 1, 2], dtype=dtype),
|
| 332 |
+
Series([None, 1, 3], dtype=dtype),
|
| 333 |
+
]
|
| 334 |
+
else:
|
| 335 |
+
arrays = [
|
| 336 |
+
Series([1, 1, 2], dtype=dtype),
|
| 337 |
+
Series([1, None, 3], dtype=dtype),
|
| 338 |
+
]
|
| 339 |
+
expected = MultiIndex.from_arrays(arrays)
|
| 340 |
+
tm.assert_index_equal(result, expected)
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def test_sort_unnecessary_warning():
|
| 344 |
+
# GH#55386
|
| 345 |
+
midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)])
|
| 346 |
+
midx = midx.set_levels([2.5, np.nan, 1], level=0)
|
| 347 |
+
result = midx.sort_values()
|
| 348 |
+
expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)])
|
| 349 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import pandas._testing as tm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def test_take(idx):
|
| 9 |
+
indexer = [4, 3, 0, 2]
|
| 10 |
+
result = idx.take(indexer)
|
| 11 |
+
expected = idx[indexer]
|
| 12 |
+
assert result.equals(expected)
|
| 13 |
+
|
| 14 |
+
# GH 10791
|
| 15 |
+
msg = "'MultiIndex' object has no attribute 'freq'"
|
| 16 |
+
with pytest.raises(AttributeError, match=msg):
|
| 17 |
+
idx.freq
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_take_invalid_kwargs(idx):
|
| 21 |
+
indices = [1, 2]
|
| 22 |
+
|
| 23 |
+
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
| 24 |
+
with pytest.raises(TypeError, match=msg):
|
| 25 |
+
idx.take(indices, foo=2)
|
| 26 |
+
|
| 27 |
+
msg = "the 'out' parameter is not supported"
|
| 28 |
+
with pytest.raises(ValueError, match=msg):
|
| 29 |
+
idx.take(indices, out=indices)
|
| 30 |
+
|
| 31 |
+
msg = "the 'mode' parameter is not supported"
|
| 32 |
+
with pytest.raises(ValueError, match=msg):
|
| 33 |
+
idx.take(indices, mode="clip")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def test_take_fill_value():
|
| 37 |
+
# GH 12631
|
| 38 |
+
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
| 39 |
+
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
|
| 40 |
+
|
| 41 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 42 |
+
exp_vals = [
|
| 43 |
+
("A", pd.Timestamp("2011-01-02")),
|
| 44 |
+
("A", pd.Timestamp("2011-01-01")),
|
| 45 |
+
("B", pd.Timestamp("2011-01-02")),
|
| 46 |
+
]
|
| 47 |
+
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
| 48 |
+
tm.assert_index_equal(result, expected)
|
| 49 |
+
|
| 50 |
+
# fill_value
|
| 51 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 52 |
+
exp_vals = [
|
| 53 |
+
("A", pd.Timestamp("2011-01-02")),
|
| 54 |
+
("A", pd.Timestamp("2011-01-01")),
|
| 55 |
+
(np.nan, pd.NaT),
|
| 56 |
+
]
|
| 57 |
+
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
| 58 |
+
tm.assert_index_equal(result, expected)
|
| 59 |
+
|
| 60 |
+
# allow_fill=False
|
| 61 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 62 |
+
exp_vals = [
|
| 63 |
+
("A", pd.Timestamp("2011-01-02")),
|
| 64 |
+
("A", pd.Timestamp("2011-01-01")),
|
| 65 |
+
("B", pd.Timestamp("2011-01-02")),
|
| 66 |
+
]
|
| 67 |
+
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
| 68 |
+
tm.assert_index_equal(result, expected)
|
| 69 |
+
|
| 70 |
+
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
|
| 71 |
+
with pytest.raises(ValueError, match=msg):
|
| 72 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 73 |
+
with pytest.raises(ValueError, match=msg):
|
| 74 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 75 |
+
|
| 76 |
+
msg = "index -5 is out of bounds for( axis 0 with)? size 4"
|
| 77 |
+
with pytest.raises(IndexError, match=msg):
|
| 78 |
+
idx.take(np.array([1, -5]))
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Index,
|
| 6 |
+
to_datetime,
|
| 7 |
+
to_timedelta,
|
| 8 |
+
)
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestAstype:
|
| 13 |
+
def test_astype_float64_to_uint64(self):
|
| 14 |
+
# GH#45309 used to incorrectly return Index with int64 dtype
|
| 15 |
+
idx = Index([0.0, 5.0, 10.0, 15.0, 20.0], dtype=np.float64)
|
| 16 |
+
result = idx.astype("u8")
|
| 17 |
+
expected = Index([0, 5, 10, 15, 20], dtype=np.uint64)
|
| 18 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 19 |
+
|
| 20 |
+
idx_with_negatives = idx - 10
|
| 21 |
+
with pytest.raises(ValueError, match="losslessly"):
|
| 22 |
+
idx_with_negatives.astype(np.uint64)
|
| 23 |
+
|
| 24 |
+
def test_astype_float64_to_object(self):
|
| 25 |
+
float_index = Index([0.0, 2.5, 5.0, 7.5, 10.0], dtype=np.float64)
|
| 26 |
+
result = float_index.astype(object)
|
| 27 |
+
assert result.equals(float_index)
|
| 28 |
+
assert float_index.equals(result)
|
| 29 |
+
assert isinstance(result, Index) and result.dtype == object
|
| 30 |
+
|
| 31 |
+
def test_astype_float64_mixed_to_object(self):
|
| 32 |
+
# mixed int-float
|
| 33 |
+
idx = Index([1.5, 2, 3, 4, 5], dtype=np.float64)
|
| 34 |
+
idx.name = "foo"
|
| 35 |
+
result = idx.astype(object)
|
| 36 |
+
assert result.equals(idx)
|
| 37 |
+
assert idx.equals(result)
|
| 38 |
+
assert isinstance(result, Index) and result.dtype == object
|
| 39 |
+
|
| 40 |
+
@pytest.mark.parametrize("dtype", ["int16", "int32", "int64"])
|
| 41 |
+
def test_astype_float64_to_int_dtype(self, dtype):
|
| 42 |
+
# GH#12881
|
| 43 |
+
# a float astype int
|
| 44 |
+
idx = Index([0, 1, 2], dtype=np.float64)
|
| 45 |
+
result = idx.astype(dtype)
|
| 46 |
+
expected = Index([0, 1, 2], dtype=dtype)
|
| 47 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 48 |
+
|
| 49 |
+
idx = Index([0, 1.1, 2], dtype=np.float64)
|
| 50 |
+
result = idx.astype(dtype)
|
| 51 |
+
expected = Index([0, 1, 2], dtype=dtype)
|
| 52 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 53 |
+
|
| 54 |
+
@pytest.mark.parametrize("dtype", ["float32", "float64"])
|
| 55 |
+
def test_astype_float64_to_float_dtype(self, dtype):
|
| 56 |
+
# GH#12881
|
| 57 |
+
# a float astype int
|
| 58 |
+
idx = Index([0, 1, 2], dtype=np.float64)
|
| 59 |
+
result = idx.astype(dtype)
|
| 60 |
+
assert isinstance(result, Index) and result.dtype == dtype
|
| 61 |
+
|
| 62 |
+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
|
| 63 |
+
def test_astype_float_to_datetimelike(self, dtype):
|
| 64 |
+
# GH#49660 pre-2.0 Index.astype from floating to M8/m8/Period raised,
|
| 65 |
+
# inconsistent with Series.astype
|
| 66 |
+
idx = Index([0, 1.1, 2], dtype=np.float64)
|
| 67 |
+
|
| 68 |
+
result = idx.astype(dtype)
|
| 69 |
+
if dtype[0] == "M":
|
| 70 |
+
expected = to_datetime(idx.values)
|
| 71 |
+
else:
|
| 72 |
+
expected = to_timedelta(idx.values)
|
| 73 |
+
tm.assert_index_equal(result, expected)
|
| 74 |
+
|
| 75 |
+
# check that we match Series behavior
|
| 76 |
+
result = idx.to_series().set_axis(range(3)).astype(dtype)
|
| 77 |
+
expected = expected.to_series().set_axis(range(3))
|
| 78 |
+
tm.assert_series_equal(result, expected)
|
| 79 |
+
|
| 80 |
+
@pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"])
|
| 81 |
+
@pytest.mark.parametrize("non_finite", [np.inf, np.nan])
|
| 82 |
+
def test_cannot_cast_inf_to_int(self, non_finite, dtype):
|
| 83 |
+
# GH#13149
|
| 84 |
+
idx = Index([1, 2, non_finite], dtype=np.float64)
|
| 85 |
+
|
| 86 |
+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
|
| 87 |
+
with pytest.raises(ValueError, match=msg):
|
| 88 |
+
idx.astype(dtype)
|
| 89 |
+
|
| 90 |
+
def test_astype_from_object(self):
|
| 91 |
+
index = Index([1.0, np.nan, 0.2], dtype="object")
|
| 92 |
+
result = index.astype(float)
|
| 93 |
+
expected = Index([1.0, np.nan, 0.2], dtype=np.float64)
|
| 94 |
+
assert result.dtype == expected.dtype
|
| 95 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py
ADDED
|
@@ -0,0 +1,611 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.errors import InvalidIndexError
|
| 5 |
+
|
| 6 |
+
from pandas import (
|
| 7 |
+
NA,
|
| 8 |
+
Index,
|
| 9 |
+
RangeIndex,
|
| 10 |
+
Series,
|
| 11 |
+
Timestamp,
|
| 12 |
+
)
|
| 13 |
+
import pandas._testing as tm
|
| 14 |
+
from pandas.core.arrays import (
|
| 15 |
+
ArrowExtensionArray,
|
| 16 |
+
FloatingArray,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@pytest.fixture
|
| 21 |
+
def index_large():
|
| 22 |
+
# large values used in Index[uint64] tests where no compat needed with Int64/Float64
|
| 23 |
+
large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25]
|
| 24 |
+
return Index(large, dtype=np.uint64)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class TestGetLoc:
|
| 28 |
+
def test_get_loc(self):
|
| 29 |
+
index = Index([0, 1, 2])
|
| 30 |
+
assert index.get_loc(1) == 1
|
| 31 |
+
|
| 32 |
+
def test_get_loc_raises_bad_label(self):
|
| 33 |
+
index = Index([0, 1, 2])
|
| 34 |
+
with pytest.raises(InvalidIndexError, match=r"\[1, 2\]"):
|
| 35 |
+
index.get_loc([1, 2])
|
| 36 |
+
|
| 37 |
+
def test_get_loc_float64(self):
|
| 38 |
+
idx = Index([0.0, 1.0, 2.0], dtype=np.float64)
|
| 39 |
+
|
| 40 |
+
with pytest.raises(KeyError, match="^'foo'$"):
|
| 41 |
+
idx.get_loc("foo")
|
| 42 |
+
with pytest.raises(KeyError, match=r"^1\.5$"):
|
| 43 |
+
idx.get_loc(1.5)
|
| 44 |
+
with pytest.raises(KeyError, match="^True$"):
|
| 45 |
+
idx.get_loc(True)
|
| 46 |
+
with pytest.raises(KeyError, match="^False$"):
|
| 47 |
+
idx.get_loc(False)
|
| 48 |
+
|
| 49 |
+
def test_get_loc_na(self):
|
| 50 |
+
idx = Index([np.nan, 1, 2], dtype=np.float64)
|
| 51 |
+
assert idx.get_loc(1) == 1
|
| 52 |
+
assert idx.get_loc(np.nan) == 0
|
| 53 |
+
|
| 54 |
+
idx = Index([np.nan, 1, np.nan], dtype=np.float64)
|
| 55 |
+
assert idx.get_loc(1) == 1
|
| 56 |
+
|
| 57 |
+
# representable by slice [0:2:2]
|
| 58 |
+
msg = "'Cannot get left slice bound for non-unique label: nan'"
|
| 59 |
+
with pytest.raises(KeyError, match=msg):
|
| 60 |
+
idx.slice_locs(np.nan)
|
| 61 |
+
# not representable by slice
|
| 62 |
+
idx = Index([np.nan, 1, np.nan, np.nan], dtype=np.float64)
|
| 63 |
+
assert idx.get_loc(1) == 1
|
| 64 |
+
msg = "'Cannot get left slice bound for non-unique label: nan"
|
| 65 |
+
with pytest.raises(KeyError, match=msg):
|
| 66 |
+
idx.slice_locs(np.nan)
|
| 67 |
+
|
| 68 |
+
def test_get_loc_missing_nan(self):
|
| 69 |
+
# GH#8569
|
| 70 |
+
idx = Index([1, 2], dtype=np.float64)
|
| 71 |
+
assert idx.get_loc(1) == 0
|
| 72 |
+
with pytest.raises(KeyError, match=r"^3$"):
|
| 73 |
+
idx.get_loc(3)
|
| 74 |
+
with pytest.raises(KeyError, match="^nan$"):
|
| 75 |
+
idx.get_loc(np.nan)
|
| 76 |
+
with pytest.raises(InvalidIndexError, match=r"\[nan\]"):
|
| 77 |
+
# listlike/non-hashable raises TypeError
|
| 78 |
+
idx.get_loc([np.nan])
|
| 79 |
+
|
| 80 |
+
@pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]])
|
| 81 |
+
def test_get_loc_float_index_nan_with_method(self, vals):
|
| 82 |
+
# GH#39382
|
| 83 |
+
idx = Index(vals)
|
| 84 |
+
with pytest.raises(KeyError, match="nan"):
|
| 85 |
+
idx.get_loc(np.nan)
|
| 86 |
+
|
| 87 |
+
@pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
|
| 88 |
+
def test_get_loc_numericindex_none_raises(self, dtype):
|
| 89 |
+
# case that goes through searchsorted and key is non-comparable to values
|
| 90 |
+
arr = np.arange(10**7, dtype=dtype)
|
| 91 |
+
idx = Index(arr)
|
| 92 |
+
with pytest.raises(KeyError, match="None"):
|
| 93 |
+
idx.get_loc(None)
|
| 94 |
+
|
| 95 |
+
def test_get_loc_overflows(self):
|
| 96 |
+
# unique but non-monotonic goes through IndexEngine.mapping.get_item
|
| 97 |
+
idx = Index([0, 2, 1])
|
| 98 |
+
|
| 99 |
+
val = np.iinfo(np.int64).max + 1
|
| 100 |
+
|
| 101 |
+
with pytest.raises(KeyError, match=str(val)):
|
| 102 |
+
idx.get_loc(val)
|
| 103 |
+
with pytest.raises(KeyError, match=str(val)):
|
| 104 |
+
idx._engine.get_loc(val)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class TestGetIndexer:
|
| 108 |
+
def test_get_indexer(self):
|
| 109 |
+
index1 = Index([1, 2, 3, 4, 5])
|
| 110 |
+
index2 = Index([2, 4, 6])
|
| 111 |
+
|
| 112 |
+
r1 = index1.get_indexer(index2)
|
| 113 |
+
e1 = np.array([1, 3, -1], dtype=np.intp)
|
| 114 |
+
tm.assert_almost_equal(r1, e1)
|
| 115 |
+
|
| 116 |
+
@pytest.mark.parametrize("reverse", [True, False])
|
| 117 |
+
@pytest.mark.parametrize(
|
| 118 |
+
"expected,method",
|
| 119 |
+
[
|
| 120 |
+
(np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"),
|
| 121 |
+
(np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"),
|
| 122 |
+
(np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"),
|
| 123 |
+
(np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"),
|
| 124 |
+
],
|
| 125 |
+
)
|
| 126 |
+
def test_get_indexer_methods(self, reverse, expected, method):
|
| 127 |
+
index1 = Index([1, 2, 3, 4, 5])
|
| 128 |
+
index2 = Index([2, 4, 6])
|
| 129 |
+
|
| 130 |
+
if reverse:
|
| 131 |
+
index1 = index1[::-1]
|
| 132 |
+
expected = expected[::-1]
|
| 133 |
+
|
| 134 |
+
result = index2.get_indexer(index1, method=method)
|
| 135 |
+
tm.assert_almost_equal(result, expected)
|
| 136 |
+
|
| 137 |
+
def test_get_indexer_invalid(self):
|
| 138 |
+
# GH10411
|
| 139 |
+
index = Index(np.arange(10))
|
| 140 |
+
|
| 141 |
+
with pytest.raises(ValueError, match="tolerance argument"):
|
| 142 |
+
index.get_indexer([1, 0], tolerance=1)
|
| 143 |
+
|
| 144 |
+
with pytest.raises(ValueError, match="limit argument"):
|
| 145 |
+
index.get_indexer([1, 0], limit=1)
|
| 146 |
+
|
| 147 |
+
@pytest.mark.parametrize(
|
| 148 |
+
"method, tolerance, indexer, expected",
|
| 149 |
+
[
|
| 150 |
+
("pad", None, [0, 5, 9], [0, 5, 9]),
|
| 151 |
+
("backfill", None, [0, 5, 9], [0, 5, 9]),
|
| 152 |
+
("nearest", None, [0, 5, 9], [0, 5, 9]),
|
| 153 |
+
("pad", 0, [0, 5, 9], [0, 5, 9]),
|
| 154 |
+
("backfill", 0, [0, 5, 9], [0, 5, 9]),
|
| 155 |
+
("nearest", 0, [0, 5, 9], [0, 5, 9]),
|
| 156 |
+
("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]),
|
| 157 |
+
("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]),
|
| 158 |
+
("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]),
|
| 159 |
+
("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]),
|
| 160 |
+
("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]),
|
| 161 |
+
("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]),
|
| 162 |
+
("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]),
|
| 163 |
+
("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]),
|
| 164 |
+
("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]),
|
| 165 |
+
],
|
| 166 |
+
)
|
| 167 |
+
def test_get_indexer_nearest(self, method, tolerance, indexer, expected):
|
| 168 |
+
index = Index(np.arange(10))
|
| 169 |
+
|
| 170 |
+
actual = index.get_indexer(indexer, method=method, tolerance=tolerance)
|
| 171 |
+
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
|
| 172 |
+
|
| 173 |
+
@pytest.mark.parametrize("listtype", [list, tuple, Series, np.array])
|
| 174 |
+
@pytest.mark.parametrize(
|
| 175 |
+
"tolerance, expected",
|
| 176 |
+
list(
|
| 177 |
+
zip(
|
| 178 |
+
[[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]],
|
| 179 |
+
[[0, 2, -1], [0, -1, -1], [-1, 2, 9]],
|
| 180 |
+
)
|
| 181 |
+
),
|
| 182 |
+
)
|
| 183 |
+
def test_get_indexer_nearest_listlike_tolerance(
|
| 184 |
+
self, tolerance, expected, listtype
|
| 185 |
+
):
|
| 186 |
+
index = Index(np.arange(10))
|
| 187 |
+
|
| 188 |
+
actual = index.get_indexer(
|
| 189 |
+
[0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance)
|
| 190 |
+
)
|
| 191 |
+
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
|
| 192 |
+
|
| 193 |
+
def test_get_indexer_nearest_error(self):
|
| 194 |
+
index = Index(np.arange(10))
|
| 195 |
+
with pytest.raises(ValueError, match="limit argument"):
|
| 196 |
+
index.get_indexer([1, 0], method="nearest", limit=1)
|
| 197 |
+
|
| 198 |
+
with pytest.raises(ValueError, match="tolerance size must match"):
|
| 199 |
+
index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3])
|
| 200 |
+
|
| 201 |
+
@pytest.mark.parametrize(
|
| 202 |
+
"method,expected",
|
| 203 |
+
[("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])],
|
| 204 |
+
)
|
| 205 |
+
def test_get_indexer_nearest_decreasing(self, method, expected):
|
| 206 |
+
index = Index(np.arange(10))[::-1]
|
| 207 |
+
|
| 208 |
+
actual = index.get_indexer([0, 5, 9], method=method)
|
| 209 |
+
tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp))
|
| 210 |
+
|
| 211 |
+
actual = index.get_indexer([0.2, 1.8, 8.5], method=method)
|
| 212 |
+
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
|
| 213 |
+
|
| 214 |
+
@pytest.mark.parametrize("idx_dtype", ["int64", "float64", "uint64", "range"])
|
| 215 |
+
@pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"])
|
| 216 |
+
def test_get_indexer_numeric_index_boolean_target(self, method, idx_dtype):
|
| 217 |
+
# GH 16877
|
| 218 |
+
|
| 219 |
+
if idx_dtype == "range":
|
| 220 |
+
numeric_index = RangeIndex(4)
|
| 221 |
+
else:
|
| 222 |
+
numeric_index = Index(np.arange(4, dtype=idx_dtype))
|
| 223 |
+
|
| 224 |
+
other = Index([True, False, True])
|
| 225 |
+
|
| 226 |
+
result = getattr(numeric_index, method)(other)
|
| 227 |
+
expected = np.array([-1, -1, -1], dtype=np.intp)
|
| 228 |
+
if method == "get_indexer":
|
| 229 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 230 |
+
else:
|
| 231 |
+
missing = np.arange(3, dtype=np.intp)
|
| 232 |
+
tm.assert_numpy_array_equal(result[0], expected)
|
| 233 |
+
tm.assert_numpy_array_equal(result[1], missing)
|
| 234 |
+
|
| 235 |
+
@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
|
| 236 |
+
def test_get_indexer_with_method_numeric_vs_bool(self, method):
|
| 237 |
+
left = Index([1, 2, 3])
|
| 238 |
+
right = Index([True, False])
|
| 239 |
+
|
| 240 |
+
with pytest.raises(TypeError, match="Cannot compare"):
|
| 241 |
+
left.get_indexer(right, method=method)
|
| 242 |
+
|
| 243 |
+
with pytest.raises(TypeError, match="Cannot compare"):
|
| 244 |
+
right.get_indexer(left, method=method)
|
| 245 |
+
|
| 246 |
+
def test_get_indexer_numeric_vs_bool(self):
|
| 247 |
+
left = Index([1, 2, 3])
|
| 248 |
+
right = Index([True, False])
|
| 249 |
+
|
| 250 |
+
res = left.get_indexer(right)
|
| 251 |
+
expected = -1 * np.ones(len(right), dtype=np.intp)
|
| 252 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 253 |
+
|
| 254 |
+
res = right.get_indexer(left)
|
| 255 |
+
expected = -1 * np.ones(len(left), dtype=np.intp)
|
| 256 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 257 |
+
|
| 258 |
+
res = left.get_indexer_non_unique(right)[0]
|
| 259 |
+
expected = -1 * np.ones(len(right), dtype=np.intp)
|
| 260 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 261 |
+
|
| 262 |
+
res = right.get_indexer_non_unique(left)[0]
|
| 263 |
+
expected = -1 * np.ones(len(left), dtype=np.intp)
|
| 264 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 265 |
+
|
| 266 |
+
def test_get_indexer_float64(self):
|
| 267 |
+
idx = Index([0.0, 1.0, 2.0], dtype=np.float64)
|
| 268 |
+
tm.assert_numpy_array_equal(
|
| 269 |
+
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
target = [-0.1, 0.5, 1.1]
|
| 273 |
+
tm.assert_numpy_array_equal(
|
| 274 |
+
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
|
| 275 |
+
)
|
| 276 |
+
tm.assert_numpy_array_equal(
|
| 277 |
+
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
|
| 278 |
+
)
|
| 279 |
+
tm.assert_numpy_array_equal(
|
| 280 |
+
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
def test_get_indexer_nan(self):
|
| 284 |
+
# GH#7820
|
| 285 |
+
result = Index([1, 2, np.nan], dtype=np.float64).get_indexer([np.nan])
|
| 286 |
+
expected = np.array([2], dtype=np.intp)
|
| 287 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 288 |
+
|
| 289 |
+
def test_get_indexer_int64(self):
|
| 290 |
+
index = Index(range(0, 20, 2), dtype=np.int64)
|
| 291 |
+
target = Index(np.arange(10), dtype=np.int64)
|
| 292 |
+
indexer = index.get_indexer(target)
|
| 293 |
+
expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp)
|
| 294 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 295 |
+
|
| 296 |
+
target = Index(np.arange(10), dtype=np.int64)
|
| 297 |
+
indexer = index.get_indexer(target, method="pad")
|
| 298 |
+
expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp)
|
| 299 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 300 |
+
|
| 301 |
+
target = Index(np.arange(10), dtype=np.int64)
|
| 302 |
+
indexer = index.get_indexer(target, method="backfill")
|
| 303 |
+
expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
|
| 304 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 305 |
+
|
| 306 |
+
def test_get_indexer_uint64(self, index_large):
|
| 307 |
+
target = Index(np.arange(10).astype("uint64") * 5 + 2**63)
|
| 308 |
+
indexer = index_large.get_indexer(target)
|
| 309 |
+
expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp)
|
| 310 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 311 |
+
|
| 312 |
+
target = Index(np.arange(10).astype("uint64") * 5 + 2**63)
|
| 313 |
+
indexer = index_large.get_indexer(target, method="pad")
|
| 314 |
+
expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp)
|
| 315 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 316 |
+
|
| 317 |
+
target = Index(np.arange(10).astype("uint64") * 5 + 2**63)
|
| 318 |
+
indexer = index_large.get_indexer(target, method="backfill")
|
| 319 |
+
expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp)
|
| 320 |
+
tm.assert_numpy_array_equal(indexer, expected)
|
| 321 |
+
|
| 322 |
+
@pytest.mark.parametrize("val, val2", [(4, 5), (4, 4), (4, NA), (NA, NA)])
|
| 323 |
+
def test_get_loc_masked(self, val, val2, any_numeric_ea_and_arrow_dtype):
|
| 324 |
+
# GH#39133
|
| 325 |
+
idx = Index([1, 2, 3, val, val2], dtype=any_numeric_ea_and_arrow_dtype)
|
| 326 |
+
result = idx.get_loc(2)
|
| 327 |
+
assert result == 1
|
| 328 |
+
|
| 329 |
+
with pytest.raises(KeyError, match="9"):
|
| 330 |
+
idx.get_loc(9)
|
| 331 |
+
|
| 332 |
+
def test_get_loc_masked_na(self, any_numeric_ea_and_arrow_dtype):
|
| 333 |
+
# GH#39133
|
| 334 |
+
idx = Index([1, 2, NA], dtype=any_numeric_ea_and_arrow_dtype)
|
| 335 |
+
result = idx.get_loc(NA)
|
| 336 |
+
assert result == 2
|
| 337 |
+
|
| 338 |
+
idx = Index([1, 2, NA, NA], dtype=any_numeric_ea_and_arrow_dtype)
|
| 339 |
+
result = idx.get_loc(NA)
|
| 340 |
+
tm.assert_numpy_array_equal(result, np.array([False, False, True, True]))
|
| 341 |
+
|
| 342 |
+
idx = Index([1, 2, 3], dtype=any_numeric_ea_and_arrow_dtype)
|
| 343 |
+
with pytest.raises(KeyError, match="NA"):
|
| 344 |
+
idx.get_loc(NA)
|
| 345 |
+
|
| 346 |
+
def test_get_loc_masked_na_and_nan(self):
|
| 347 |
+
# GH#39133
|
| 348 |
+
idx = Index(
|
| 349 |
+
FloatingArray(
|
| 350 |
+
np.array([1, 2, 1, np.nan]), mask=np.array([False, False, True, False])
|
| 351 |
+
)
|
| 352 |
+
)
|
| 353 |
+
result = idx.get_loc(NA)
|
| 354 |
+
assert result == 2
|
| 355 |
+
result = idx.get_loc(np.nan)
|
| 356 |
+
assert result == 3
|
| 357 |
+
|
| 358 |
+
idx = Index(
|
| 359 |
+
FloatingArray(np.array([1, 2, 1.0]), mask=np.array([False, False, True]))
|
| 360 |
+
)
|
| 361 |
+
result = idx.get_loc(NA)
|
| 362 |
+
assert result == 2
|
| 363 |
+
with pytest.raises(KeyError, match="nan"):
|
| 364 |
+
idx.get_loc(np.nan)
|
| 365 |
+
|
| 366 |
+
idx = Index(
|
| 367 |
+
FloatingArray(
|
| 368 |
+
np.array([1, 2, np.nan]), mask=np.array([False, False, False])
|
| 369 |
+
)
|
| 370 |
+
)
|
| 371 |
+
result = idx.get_loc(np.nan)
|
| 372 |
+
assert result == 2
|
| 373 |
+
with pytest.raises(KeyError, match="NA"):
|
| 374 |
+
idx.get_loc(NA)
|
| 375 |
+
|
| 376 |
+
@pytest.mark.parametrize("val", [4, 2])
|
| 377 |
+
def test_get_indexer_masked_na(self, any_numeric_ea_and_arrow_dtype, val):
|
| 378 |
+
# GH#39133
|
| 379 |
+
idx = Index([1, 2, NA, 3, val], dtype=any_numeric_ea_and_arrow_dtype)
|
| 380 |
+
result = idx.get_indexer_for([1, NA, 5])
|
| 381 |
+
expected = np.array([0, 2, -1])
|
| 382 |
+
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
| 383 |
+
|
| 384 |
+
@pytest.mark.parametrize("dtype", ["boolean", "bool[pyarrow]"])
|
| 385 |
+
def test_get_indexer_masked_na_boolean(self, dtype):
|
| 386 |
+
# GH#39133
|
| 387 |
+
if dtype == "bool[pyarrow]":
|
| 388 |
+
pytest.importorskip("pyarrow")
|
| 389 |
+
idx = Index([True, False, NA], dtype=dtype)
|
| 390 |
+
result = idx.get_loc(False)
|
| 391 |
+
assert result == 1
|
| 392 |
+
result = idx.get_loc(NA)
|
| 393 |
+
assert result == 2
|
| 394 |
+
|
| 395 |
+
def test_get_indexer_arrow_dictionary_target(self):
|
| 396 |
+
pa = pytest.importorskip("pyarrow")
|
| 397 |
+
target = Index(
|
| 398 |
+
ArrowExtensionArray(
|
| 399 |
+
pa.array([1, 2], type=pa.dictionary(pa.int8(), pa.int8()))
|
| 400 |
+
)
|
| 401 |
+
)
|
| 402 |
+
idx = Index([1])
|
| 403 |
+
|
| 404 |
+
result = idx.get_indexer(target)
|
| 405 |
+
expected = np.array([0, -1], dtype=np.int64)
|
| 406 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 407 |
+
|
| 408 |
+
result_1, result_2 = idx.get_indexer_non_unique(target)
|
| 409 |
+
expected_1, expected_2 = np.array([0, -1], dtype=np.int64), np.array(
|
| 410 |
+
[1], dtype=np.int64
|
| 411 |
+
)
|
| 412 |
+
tm.assert_numpy_array_equal(result_1, expected_1)
|
| 413 |
+
tm.assert_numpy_array_equal(result_2, expected_2)
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
class TestWhere:
|
| 417 |
+
@pytest.mark.parametrize(
|
| 418 |
+
"index",
|
| 419 |
+
[
|
| 420 |
+
Index(np.arange(5, dtype="float64")),
|
| 421 |
+
Index(range(0, 20, 2), dtype=np.int64),
|
| 422 |
+
Index(np.arange(5, dtype="uint64")),
|
| 423 |
+
],
|
| 424 |
+
)
|
| 425 |
+
def test_where(self, listlike_box, index):
|
| 426 |
+
cond = [True] * len(index)
|
| 427 |
+
expected = index
|
| 428 |
+
result = index.where(listlike_box(cond))
|
| 429 |
+
|
| 430 |
+
cond = [False] + [True] * (len(index) - 1)
|
| 431 |
+
expected = Index([index._na_value] + index[1:].tolist(), dtype=np.float64)
|
| 432 |
+
result = index.where(listlike_box(cond))
|
| 433 |
+
tm.assert_index_equal(result, expected)
|
| 434 |
+
|
| 435 |
+
def test_where_uint64(self):
|
| 436 |
+
idx = Index([0, 6, 2], dtype=np.uint64)
|
| 437 |
+
mask = np.array([False, True, False])
|
| 438 |
+
other = np.array([1], dtype=np.int64)
|
| 439 |
+
|
| 440 |
+
expected = Index([1, 6, 1], dtype=np.uint64)
|
| 441 |
+
|
| 442 |
+
result = idx.where(mask, other)
|
| 443 |
+
tm.assert_index_equal(result, expected)
|
| 444 |
+
|
| 445 |
+
result = idx.putmask(~mask, other)
|
| 446 |
+
tm.assert_index_equal(result, expected)
|
| 447 |
+
|
| 448 |
+
def test_where_infers_type_instead_of_trying_to_convert_string_to_float(self):
|
| 449 |
+
# GH 32413
|
| 450 |
+
index = Index([1, np.nan])
|
| 451 |
+
cond = index.notna()
|
| 452 |
+
other = Index(["a", "b"], dtype="string")
|
| 453 |
+
|
| 454 |
+
expected = Index([1.0, "b"])
|
| 455 |
+
result = index.where(cond, other)
|
| 456 |
+
|
| 457 |
+
tm.assert_index_equal(result, expected)
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
class TestTake:
|
| 461 |
+
@pytest.mark.parametrize("idx_dtype", [np.float64, np.int64, np.uint64])
|
| 462 |
+
def test_take_preserve_name(self, idx_dtype):
|
| 463 |
+
index = Index([1, 2, 3, 4], dtype=idx_dtype, name="foo")
|
| 464 |
+
taken = index.take([3, 0, 1])
|
| 465 |
+
assert index.name == taken.name
|
| 466 |
+
|
| 467 |
+
def test_take_fill_value_float64(self):
|
| 468 |
+
# GH 12631
|
| 469 |
+
idx = Index([1.0, 2.0, 3.0], name="xxx", dtype=np.float64)
|
| 470 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 471 |
+
expected = Index([2.0, 1.0, 3.0], dtype=np.float64, name="xxx")
|
| 472 |
+
tm.assert_index_equal(result, expected)
|
| 473 |
+
|
| 474 |
+
# fill_value
|
| 475 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 476 |
+
expected = Index([2.0, 1.0, np.nan], dtype=np.float64, name="xxx")
|
| 477 |
+
tm.assert_index_equal(result, expected)
|
| 478 |
+
|
| 479 |
+
# allow_fill=False
|
| 480 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 481 |
+
expected = Index([2.0, 1.0, 3.0], dtype=np.float64, name="xxx")
|
| 482 |
+
tm.assert_index_equal(result, expected)
|
| 483 |
+
|
| 484 |
+
msg = (
|
| 485 |
+
"When allow_fill=True and fill_value is not None, "
|
| 486 |
+
"all indices must be >= -1"
|
| 487 |
+
)
|
| 488 |
+
with pytest.raises(ValueError, match=msg):
|
| 489 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 490 |
+
with pytest.raises(ValueError, match=msg):
|
| 491 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 492 |
+
|
| 493 |
+
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
| 494 |
+
with pytest.raises(IndexError, match=msg):
|
| 495 |
+
idx.take(np.array([1, -5]))
|
| 496 |
+
|
| 497 |
+
@pytest.mark.parametrize("dtype", [np.int64, np.uint64])
|
| 498 |
+
def test_take_fill_value_ints(self, dtype):
|
| 499 |
+
# see gh-12631
|
| 500 |
+
idx = Index([1, 2, 3], dtype=dtype, name="xxx")
|
| 501 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 502 |
+
expected = Index([2, 1, 3], dtype=dtype, name="xxx")
|
| 503 |
+
tm.assert_index_equal(result, expected)
|
| 504 |
+
|
| 505 |
+
name = type(idx).__name__
|
| 506 |
+
msg = f"Unable to fill values because {name} cannot contain NA"
|
| 507 |
+
|
| 508 |
+
# fill_value=True
|
| 509 |
+
with pytest.raises(ValueError, match=msg):
|
| 510 |
+
idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 511 |
+
|
| 512 |
+
# allow_fill=False
|
| 513 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 514 |
+
expected = Index([2, 1, 3], dtype=dtype, name="xxx")
|
| 515 |
+
tm.assert_index_equal(result, expected)
|
| 516 |
+
|
| 517 |
+
with pytest.raises(ValueError, match=msg):
|
| 518 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 519 |
+
with pytest.raises(ValueError, match=msg):
|
| 520 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 521 |
+
|
| 522 |
+
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
| 523 |
+
with pytest.raises(IndexError, match=msg):
|
| 524 |
+
idx.take(np.array([1, -5]))
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
class TestContains:
|
| 528 |
+
@pytest.mark.parametrize("dtype", [np.float64, np.int64, np.uint64])
|
| 529 |
+
def test_contains_none(self, dtype):
|
| 530 |
+
# GH#35788 should return False, not raise TypeError
|
| 531 |
+
index = Index([0, 1, 2, 3, 4], dtype=dtype)
|
| 532 |
+
assert None not in index
|
| 533 |
+
|
| 534 |
+
def test_contains_float64_nans(self):
|
| 535 |
+
index = Index([1.0, 2.0, np.nan], dtype=np.float64)
|
| 536 |
+
assert np.nan in index
|
| 537 |
+
|
| 538 |
+
def test_contains_float64_not_nans(self):
|
| 539 |
+
index = Index([1.0, 2.0, np.nan], dtype=np.float64)
|
| 540 |
+
assert 1.0 in index
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
class TestSliceLocs:
|
| 544 |
+
@pytest.mark.parametrize("dtype", [int, float])
|
| 545 |
+
def test_slice_locs(self, dtype):
|
| 546 |
+
index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
|
| 547 |
+
n = len(index)
|
| 548 |
+
|
| 549 |
+
assert index.slice_locs(start=2) == (2, n)
|
| 550 |
+
assert index.slice_locs(start=3) == (3, n)
|
| 551 |
+
assert index.slice_locs(3, 8) == (3, 6)
|
| 552 |
+
assert index.slice_locs(5, 10) == (3, n)
|
| 553 |
+
assert index.slice_locs(end=8) == (0, 6)
|
| 554 |
+
assert index.slice_locs(end=9) == (0, 7)
|
| 555 |
+
|
| 556 |
+
# reversed
|
| 557 |
+
index2 = index[::-1]
|
| 558 |
+
assert index2.slice_locs(8, 2) == (2, 6)
|
| 559 |
+
assert index2.slice_locs(7, 3) == (2, 5)
|
| 560 |
+
|
| 561 |
+
@pytest.mark.parametrize("dtype", [int, float])
|
| 562 |
+
def test_slice_locs_float_locs(self, dtype):
|
| 563 |
+
index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
|
| 564 |
+
n = len(index)
|
| 565 |
+
assert index.slice_locs(5.0, 10.0) == (3, n)
|
| 566 |
+
assert index.slice_locs(4.5, 10.5) == (3, 8)
|
| 567 |
+
|
| 568 |
+
index2 = index[::-1]
|
| 569 |
+
assert index2.slice_locs(8.5, 1.5) == (2, 6)
|
| 570 |
+
assert index2.slice_locs(10.5, -1) == (0, n)
|
| 571 |
+
|
| 572 |
+
@pytest.mark.parametrize("dtype", [int, float])
|
| 573 |
+
def test_slice_locs_dup_numeric(self, dtype):
|
| 574 |
+
index = Index(np.array([10, 12, 12, 14], dtype=dtype))
|
| 575 |
+
assert index.slice_locs(12, 12) == (1, 3)
|
| 576 |
+
assert index.slice_locs(11, 13) == (1, 3)
|
| 577 |
+
|
| 578 |
+
index2 = index[::-1]
|
| 579 |
+
assert index2.slice_locs(12, 12) == (1, 3)
|
| 580 |
+
assert index2.slice_locs(13, 11) == (1, 3)
|
| 581 |
+
|
| 582 |
+
def test_slice_locs_na(self):
|
| 583 |
+
index = Index([np.nan, 1, 2])
|
| 584 |
+
assert index.slice_locs(1) == (1, 3)
|
| 585 |
+
assert index.slice_locs(np.nan) == (0, 3)
|
| 586 |
+
|
| 587 |
+
index = Index([0, np.nan, np.nan, 1, 2])
|
| 588 |
+
assert index.slice_locs(np.nan) == (1, 5)
|
| 589 |
+
|
| 590 |
+
def test_slice_locs_na_raises(self):
|
| 591 |
+
index = Index([np.nan, 1, 2])
|
| 592 |
+
with pytest.raises(KeyError, match=""):
|
| 593 |
+
index.slice_locs(start=1.5)
|
| 594 |
+
|
| 595 |
+
with pytest.raises(KeyError, match=""):
|
| 596 |
+
index.slice_locs(end=1.5)
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
class TestGetSliceBounds:
|
| 600 |
+
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
| 601 |
+
def test_get_slice_bounds_within(self, side, expected):
|
| 602 |
+
index = Index(range(6))
|
| 603 |
+
result = index.get_slice_bound(4, side=side)
|
| 604 |
+
assert result == expected
|
| 605 |
+
|
| 606 |
+
@pytest.mark.parametrize("side", ["left", "right"])
|
| 607 |
+
@pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)])
|
| 608 |
+
def test_get_slice_bounds_outside(self, side, expected, bound):
|
| 609 |
+
index = Index(range(6))
|
| 610 |
+
result = index.get_slice_bound(bound, side=side)
|
| 611 |
+
assert result == expected
|