Z-Image-Special-Edtion / python_env /lib /site-packages /pandas /tests /generic /test_duplicate_labels.py
| """Tests dealing with the NDFrame.allows_duplicates.""" | |
| import operator | |
| import numpy as np | |
| import pytest | |
| import pandas as pd | |
| import pandas._testing as tm | |
| not_implemented = pytest.mark.xfail(reason="Not implemented.") | |
| # ---------------------------------------------------------------------------- | |
| # Preservation | |
| class TestPreserves: | |
| def test_construction_ok(self, cls, data): | |
| result = cls(data) | |
| assert result.flags.allows_duplicate_labels is True | |
| result = cls(data).set_flags(allows_duplicate_labels=False) | |
| assert result.flags.allows_duplicate_labels is False | |
| def test_preserved_series(self, func): | |
| s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) | |
| assert func(s).flags.allows_duplicate_labels is False | |
| # TODO: frame | |
| def test_align(self, other): | |
| s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) | |
| a, b = s.align(other) | |
| assert a.flags.allows_duplicate_labels is False | |
| assert b.flags.allows_duplicate_labels is False | |
| def test_preserved_frame(self): | |
| df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( | |
| allows_duplicate_labels=False | |
| ) | |
| assert df.loc[["a"]].flags.allows_duplicate_labels is False | |
| assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False | |
| def test_to_frame(self): | |
| ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False) | |
| assert ser.to_frame().flags.allows_duplicate_labels is False | |
| def test_binops(self, func, other, frame): | |
| df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags( | |
| allows_duplicate_labels=False | |
| ) | |
| if frame: | |
| df = df.to_frame() | |
| if isinstance(other, pd.Series) and frame: | |
| other = other.to_frame() | |
| func = operator.methodcaller(func, other) | |
| assert df.flags.allows_duplicate_labels is False | |
| assert func(df).flags.allows_duplicate_labels is False | |
| def test_preserve_getitem(self): | |
| df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) | |
| assert df[["A"]].flags.allows_duplicate_labels is False | |
| assert df["A"].flags.allows_duplicate_labels is False | |
| assert df.loc[0].flags.allows_duplicate_labels is False | |
| assert df.loc[[0]].flags.allows_duplicate_labels is False | |
| assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False | |
| def test_ndframe_getitem_caching_issue( | |
| self, request, using_copy_on_write, warn_copy_on_write | |
| ): | |
| if not (using_copy_on_write or warn_copy_on_write): | |
| request.applymarker(pytest.mark.xfail(reason="Unclear behavior.")) | |
| # NDFrame.__getitem__ will cache the first df['A']. May need to | |
| # invalidate that cache? Update the cached entries? | |
| df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) | |
| assert df["A"].flags.allows_duplicate_labels is False | |
| df.flags.allows_duplicate_labels = True | |
| assert df["A"].flags.allows_duplicate_labels is True | |
| def test_concat(self, objs, kwargs): | |
| objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] | |
| result = pd.concat(objs, **kwargs) | |
| assert result.flags.allows_duplicate_labels is False | |
| def test_merge(self, left, right, expected): | |
| result = pd.merge(left, right, left_index=True, right_index=True) | |
| assert result.flags.allows_duplicate_labels is expected | |
| def test_groupby(self): | |
| # XXX: This is under tested | |
| # TODO: | |
| # - apply | |
| # - transform | |
| # - Should passing a grouper that disallows duplicates propagate? | |
| df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False) | |
| result = df.groupby([0, 0, 1]).agg("count") | |
| assert result.flags.allows_duplicate_labels is False | |
| def test_window(self, frame): | |
| df = pd.Series( | |
| 1, | |
| index=pd.date_range("2000", periods=12), | |
| name="A", | |
| allows_duplicate_labels=False, | |
| ) | |
| if frame: | |
| df = df.to_frame() | |
| assert df.rolling(3).mean().flags.allows_duplicate_labels is False | |
| assert df.ewm(3).mean().flags.allows_duplicate_labels is False | |
| assert df.expanding(3).mean().flags.allows_duplicate_labels is False | |
| # ---------------------------------------------------------------------------- | |
| # Raises | |
| class TestRaises: | |
| def test_set_flags_with_duplicates(self, cls, axes): | |
| result = cls(**axes) | |
| assert result.flags.allows_duplicate_labels is True | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| cls(**axes).set_flags(allows_duplicate_labels=False) | |
| def test_setting_allows_duplicate_labels_raises(self, data): | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| data.flags.allows_duplicate_labels = False | |
| assert data.flags.allows_duplicate_labels is True | |
| def test_series_raises(self): | |
| a = pd.Series(0, index=["a", "b"]) | |
| b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| pd.concat([a, b]) | |
| def test_getitem_raises(self, getter, target): | |
| df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( | |
| allows_duplicate_labels=False | |
| ) | |
| if target: | |
| # df, df.loc, or df.iloc | |
| target = getattr(df, target) | |
| else: | |
| target = df | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| getter(target) | |
| def test_concat_raises(self, objs, kwargs): | |
| objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| pd.concat(objs, **kwargs) | |
| def test_merge_raises(self): | |
| a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags( | |
| allows_duplicate_labels=False | |
| ) | |
| b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"]) | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| pd.merge(a, b, left_index=True, right_index=True) | |
| def test_raises_basic(idx): | |
| msg = "Index has duplicates." | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False) | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False) | |
| with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | |
| pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False) | |
| def test_format_duplicate_labels_message(): | |
| idx = pd.Index(["a", "b", "a", "b", "c"]) | |
| result = idx._format_duplicate_message() | |
| expected = pd.DataFrame( | |
| {"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label") | |
| ) | |
| tm.assert_frame_equal(result, expected) | |
| def test_format_duplicate_labels_message_multi(): | |
| idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]]) | |
| result = idx._format_duplicate_message() | |
| expected = pd.DataFrame( | |
| {"positions": [[0, 2], [1, 3]]}, | |
| index=pd.MultiIndex.from_product([["A"], ["a", "b"]]), | |
| ) | |
| tm.assert_frame_equal(result, expected) | |
| def test_dataframe_insert_raises(): | |
| df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) | |
| msg = "Cannot specify" | |
| with pytest.raises(ValueError, match=msg): | |
| df.insert(0, "A", [3, 4], allow_duplicates=True) | |
| def test_inplace_raises(method, frame_only): | |
| df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags( | |
| allows_duplicate_labels=False | |
| ) | |
| s = df["A"] | |
| s.flags.allows_duplicate_labels = False | |
| msg = "Cannot specify" | |
| with pytest.raises(ValueError, match=msg): | |
| method(df) | |
| if not frame_only: | |
| with pytest.raises(ValueError, match=msg): | |
| method(s) | |
| def test_pickle(): | |
| a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False) | |
| b = tm.round_trip_pickle(a) | |
| tm.assert_series_equal(a, b) | |
| a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False) | |
| b = tm.round_trip_pickle(a) | |
| tm.assert_frame_equal(a, b) | |