| import numpy as np |
| import pytest |
|
|
| from pandas.errors import SettingWithCopyWarning |
|
|
| from pandas.core.dtypes.common import is_float_dtype |
|
|
| import pandas as pd |
| from pandas import ( |
| DataFrame, |
| Series, |
| ) |
| import pandas._testing as tm |
| from pandas.tests.copy_view.util import get_array |
|
|
|
|
| @pytest.fixture(params=["numpy", "nullable"]) |
| def backend(request): |
| if request.param == "numpy": |
|
|
| def make_dataframe(*args, **kwargs): |
| return DataFrame(*args, **kwargs) |
|
|
| def make_series(*args, **kwargs): |
| return Series(*args, **kwargs) |
|
|
| elif request.param == "nullable": |
|
|
| def make_dataframe(*args, **kwargs): |
| df = DataFrame(*args, **kwargs) |
| df_nullable = df.convert_dtypes() |
| |
| |
| for col in df.columns: |
| if is_float_dtype(df[col].dtype) and not is_float_dtype( |
| df_nullable[col].dtype |
| ): |
| df_nullable[col] = df_nullable[col].astype("Float64") |
| |
| return df_nullable.copy() |
|
|
| def make_series(*args, **kwargs): |
| ser = Series(*args, **kwargs) |
| return ser.convert_dtypes().copy() |
|
|
| return request.param, make_dataframe, make_series |
|
|
|
|
| |
| |
|
|
|
|
| def test_subset_column_selection(backend, using_copy_on_write): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
|
|
| subset = df[["a", "c"]] |
|
|
| if using_copy_on_write: |
| |
| assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
| |
| subset.iloc[0, 0] = 0 |
| else: |
| assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
| |
| |
| |
| subset.iloc[0, 0] = 0 |
|
|
| assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
|
|
| expected = DataFrame({"a": [0, 2, 3], "c": [0.1, 0.2, 0.3]}) |
| tm.assert_frame_equal(subset, expected) |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| def test_subset_column_selection_modify_parent(backend, using_copy_on_write): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
|
|
| subset = df[["a", "c"]] |
|
|
| if using_copy_on_write: |
| |
| assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
| |
| df.iloc[0, 0] = 0 |
|
|
| assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
| if using_copy_on_write: |
| |
| assert np.shares_memory(get_array(subset, "c"), get_array(df, "c")) |
|
|
| expected = DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]}) |
| tm.assert_frame_equal(subset, expected) |
|
|
|
|
| def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
|
|
| subset = df[1:3] |
| subset._mgr._verify_integrity() |
|
|
| assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
|
|
| if using_copy_on_write: |
| subset.iloc[0, 0] = 0 |
| assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) |
|
|
| else: |
| |
| |
| |
| with tm.assert_cow_warning(warn_copy_on_write): |
| subset.iloc[0, 0] = 0 |
|
|
| subset._mgr._verify_integrity() |
|
|
| expected = DataFrame({"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)) |
| tm.assert_frame_equal(subset, expected) |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| |
| df_orig.iloc[1, 0] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| def test_subset_column_slice( |
| backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype |
| ): |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| single_block = ( |
| dtype == "int64" and dtype_backend == "numpy" |
| ) and not using_array_manager |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
|
|
| subset = df.iloc[:, 1:] |
| subset._mgr._verify_integrity() |
|
|
| if using_copy_on_write: |
| assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) |
|
|
| subset.iloc[0, 0] = 0 |
| assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(single_block): |
| subset.iloc[0, 0] = 0 |
| else: |
| |
| warn = SettingWithCopyWarning if single_block else None |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(warn): |
| subset.iloc[0, 0] = 0 |
|
|
| expected = DataFrame({"b": [0, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}) |
| tm.assert_frame_equal(subset, expected) |
| |
| |
| if not using_copy_on_write and (using_array_manager or single_block): |
| df_orig.iloc[0, 1] = 0 |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| @pytest.mark.parametrize( |
| "row_indexer", |
| [slice(1, 2), np.array([False, True, True]), np.array([1, 2])], |
| ids=["slice", "mask", "array"], |
| ) |
| @pytest.mark.parametrize( |
| "column_indexer", |
| [slice("b", "c"), np.array([False, True, True]), ["b", "c"]], |
| ids=["slice", "mask", "array"], |
| ) |
| def test_subset_loc_rows_columns( |
| backend, |
| dtype, |
| row_indexer, |
| column_indexer, |
| using_array_manager, |
| using_copy_on_write, |
| warn_copy_on_write, |
| ): |
| |
| |
| |
| |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
|
|
| subset = df.loc[row_indexer, column_indexer] |
|
|
| |
| |
| mutate_parent = ( |
| isinstance(row_indexer, slice) |
| and isinstance(column_indexer, slice) |
| and ( |
| using_array_manager |
| or ( |
| dtype == "int64" |
| and dtype_backend == "numpy" |
| and not using_copy_on_write |
| ) |
| ) |
| ) |
|
|
| |
| with tm.assert_cow_warning(warn_copy_on_write and mutate_parent): |
| subset.iloc[0, 0] = 0 |
|
|
| expected = DataFrame( |
| {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) |
| ) |
| tm.assert_frame_equal(subset, expected) |
| if mutate_parent: |
| df_orig.iloc[1, 1] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| @pytest.mark.parametrize( |
| "row_indexer", |
| [slice(1, 3), np.array([False, True, True]), np.array([1, 2])], |
| ids=["slice", "mask", "array"], |
| ) |
| @pytest.mark.parametrize( |
| "column_indexer", |
| [slice(1, 3), np.array([False, True, True]), [1, 2]], |
| ids=["slice", "mask", "array"], |
| ) |
| def test_subset_iloc_rows_columns( |
| backend, |
| dtype, |
| row_indexer, |
| column_indexer, |
| using_array_manager, |
| using_copy_on_write, |
| warn_copy_on_write, |
| ): |
| |
| |
| |
| |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
|
|
| subset = df.iloc[row_indexer, column_indexer] |
|
|
| |
| |
| mutate_parent = ( |
| isinstance(row_indexer, slice) |
| and isinstance(column_indexer, slice) |
| and ( |
| using_array_manager |
| or ( |
| dtype == "int64" |
| and dtype_backend == "numpy" |
| and not using_copy_on_write |
| ) |
| ) |
| ) |
|
|
| |
| with tm.assert_cow_warning(warn_copy_on_write and mutate_parent): |
| subset.iloc[0, 0] = 0 |
|
|
| expected = DataFrame( |
| {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) |
| ) |
| tm.assert_frame_equal(subset, expected) |
| if mutate_parent: |
| df_orig.iloc[1, 1] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "indexer", |
| [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], |
| ids=["slice", "mask", "array"], |
| ) |
| def test_subset_set_with_row_indexer( |
| backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write |
| ): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) |
| df_orig = df.copy() |
| subset = df[1:4] |
|
|
| if ( |
| indexer_si is tm.setitem |
| and isinstance(indexer, np.ndarray) |
| and indexer.dtype == "int" |
| ): |
| pytest.skip("setitem with labels selects on columns") |
|
|
| if using_copy_on_write: |
| indexer_si(subset)[indexer] = 0 |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| indexer_si(subset)[indexer] = 0 |
| else: |
| |
| warn = SettingWithCopyWarning if indexer_si is tm.setitem else None |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(warn): |
| indexer_si(subset)[indexer] = 0 |
|
|
| expected = DataFrame( |
| {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4) |
| ) |
| tm.assert_frame_equal(subset, expected) |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| |
| df_orig[1:3] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) |
| df_orig = df.copy() |
| subset = df[1:4] |
|
|
| mask = subset > 3 |
|
|
| if using_copy_on_write: |
| subset[mask] = 0 |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| subset[mask] = 0 |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(SettingWithCopyWarning): |
| subset[mask] = 0 |
|
|
| expected = DataFrame( |
| {"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4) |
| ) |
| tm.assert_frame_equal(subset, expected) |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| |
| df_orig.loc[3, "a"] = 0 |
| df_orig.loc[1:3, "b"] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
| subset = df[1:3] |
|
|
| if dtype_backend == "numpy": |
| arr = np.array([10, 11], dtype="int64") |
| else: |
| arr = pd.array([10, 11], dtype="Int64") |
|
|
| if using_copy_on_write or warn_copy_on_write: |
| subset["a"] = arr |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(SettingWithCopyWarning): |
| subset["a"] = arr |
|
|
| subset._mgr._verify_integrity() |
| expected = DataFrame( |
| {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3) |
| ) |
| tm.assert_frame_equal(subset, expected) |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| def test_subset_set_column_with_loc( |
| backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype |
| ): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
| subset = df[1:3] |
|
|
| if using_copy_on_write: |
| subset.loc[:, "a"] = np.array([10, 11], dtype="int64") |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| subset.loc[:, "a"] = np.array([10, 11], dtype="int64") |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning( |
| None, |
| raise_on_extra_warnings=not using_array_manager, |
| ): |
| subset.loc[:, "a"] = np.array([10, 11], dtype="int64") |
|
|
| subset._mgr._verify_integrity() |
| expected = DataFrame( |
| {"a": [10, 11], "b": [5, 6], "c": np.array([8, 9], dtype=dtype)}, |
| index=range(1, 3), |
| ) |
| tm.assert_frame_equal(subset, expected) |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| |
| df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64") |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| def test_subset_set_column_with_loc2( |
| backend, using_copy_on_write, warn_copy_on_write, using_array_manager |
| ): |
| |
| |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3]}) |
| df_orig = df.copy() |
| subset = df[1:3] |
|
|
| if using_copy_on_write: |
| subset.loc[:, "a"] = 0 |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| subset.loc[:, "a"] = 0 |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning( |
| None, |
| raise_on_extra_warnings=not using_array_manager, |
| ): |
| subset.loc[:, "a"] = 0 |
|
|
| subset._mgr._verify_integrity() |
| expected = DataFrame({"a": [0, 0]}, index=range(1, 3)) |
| tm.assert_frame_equal(subset, expected) |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| |
| df_orig.loc[1:3, "a"] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype): |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
| subset = df[1:3] |
|
|
| if using_copy_on_write or warn_copy_on_write: |
| subset[["a", "c"]] = 0 |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(SettingWithCopyWarning): |
| subset[["a", "c"]] = 0 |
|
|
| subset._mgr._verify_integrity() |
| if using_copy_on_write: |
| |
| assert all(subset._mgr._has_no_reference(i) for i in [0, 2]) |
| expected = DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3)) |
| if dtype_backend == "nullable": |
| |
| |
| expected["a"] = expected["a"].astype("int64") |
| expected["c"] = expected["c"].astype("int64") |
|
|
| tm.assert_frame_equal(subset, expected) |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "indexer", |
| [slice("a", "b"), np.array([True, True, False]), ["a", "b"]], |
| ids=["slice", "mask", "array"], |
| ) |
| def test_subset_set_with_column_indexer( |
| backend, indexer, using_copy_on_write, warn_copy_on_write |
| ): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]}) |
| df_orig = df.copy() |
| subset = df[1:3] |
|
|
| if using_copy_on_write: |
| subset.loc[:, indexer] = 0 |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| subset.loc[:, indexer] = 0 |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| |
| |
| subset.loc[:, indexer] = 0 |
|
|
| subset._mgr._verify_integrity() |
| expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)) |
| tm.assert_frame_equal(subset, expected) |
| if using_copy_on_write: |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| |
| |
| |
| df_orig.loc[1:2, ["a", "b"]] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "method", |
| [ |
| lambda df: df[["a", "b"]][0:2], |
| lambda df: df[0:2][["a", "b"]], |
| lambda df: df[["a", "b"]].iloc[0:2], |
| lambda df: df[["a", "b"]].loc[0:1], |
| lambda df: df[0:2].iloc[:, 0:2], |
| lambda df: df[0:2].loc[:, "a":"b"], |
| ], |
| ids=[ |
| "row-getitem-slice", |
| "column-getitem", |
| "row-iloc-slice", |
| "row-loc-slice", |
| "column-iloc-slice", |
| "column-loc-slice", |
| ], |
| ) |
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| def test_subset_chained_getitem( |
| request, |
| backend, |
| method, |
| dtype, |
| using_copy_on_write, |
| using_array_manager, |
| warn_copy_on_write, |
| ): |
| |
| |
| _, DataFrame, _ = backend |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
|
|
| |
| |
| test_callspec = request.node.callspec.id |
| if not using_array_manager: |
| subset_is_view = test_callspec in ( |
| "numpy-single-block-column-iloc-slice", |
| "numpy-single-block-column-loc-slice", |
| ) |
| else: |
| |
| |
| subset_is_view = test_callspec.endswith( |
| ("column-iloc-slice", "column-loc-slice") |
| ) |
|
|
| |
| subset = method(df) |
|
|
| with tm.assert_cow_warning(warn_copy_on_write and subset_is_view): |
| subset.iloc[0, 0] = 0 |
| if using_copy_on_write or (not subset_is_view): |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| assert df.iloc[0, 0] == 0 |
|
|
| |
| subset = method(df) |
| with tm.assert_cow_warning(warn_copy_on_write and subset_is_view): |
| df.iloc[0, 0] = 0 |
| expected = DataFrame({"a": [1, 2], "b": [4, 5]}) |
| if using_copy_on_write or not subset_is_view: |
| tm.assert_frame_equal(subset, expected) |
| else: |
| assert subset.iloc[0, 0] == 0 |
|
|
|
|
| @pytest.mark.parametrize( |
| "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] |
| ) |
| def test_subset_chained_getitem_column( |
| backend, dtype, using_copy_on_write, warn_copy_on_write |
| ): |
| |
| |
| dtype_backend, DataFrame, Series = backend |
| df = DataFrame( |
| {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} |
| ) |
| df_orig = df.copy() |
|
|
| |
| subset = df[:]["a"][0:2] |
| df._clear_item_cache() |
| with tm.assert_cow_warning(warn_copy_on_write): |
| subset.iloc[0] = 0 |
| if using_copy_on_write: |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| assert df.iloc[0, 0] == 0 |
|
|
| |
| subset = df[:]["a"][0:2] |
| df._clear_item_cache() |
| with tm.assert_cow_warning(warn_copy_on_write): |
| df.iloc[0, 0] = 0 |
| expected = Series([1, 2], name="a") |
| if using_copy_on_write: |
| tm.assert_series_equal(subset, expected) |
| else: |
| assert subset.iloc[0] == 0 |
|
|
|
|
| @pytest.mark.parametrize( |
| "method", |
| [ |
| lambda s: s["a":"c"]["a":"b"], |
| lambda s: s.iloc[0:3].iloc[0:2], |
| lambda s: s.loc["a":"c"].loc["a":"b"], |
| lambda s: s.loc["a":"c"] |
| .iloc[0:3] |
| .iloc[0:2] |
| .loc["a":"b"] |
| .iloc[0:1], |
| ], |
| ids=["getitem", "iloc", "loc", "long-chain"], |
| ) |
| def test_subset_chained_getitem_series( |
| backend, method, using_copy_on_write, warn_copy_on_write |
| ): |
| |
| |
| _, _, Series = backend |
| s = Series([1, 2, 3], index=["a", "b", "c"]) |
| s_orig = s.copy() |
|
|
| |
| subset = method(s) |
| with tm.assert_cow_warning(warn_copy_on_write): |
| subset.iloc[0] = 0 |
| if using_copy_on_write: |
| tm.assert_series_equal(s, s_orig) |
| else: |
| assert s.iloc[0] == 0 |
|
|
| |
| subset = s.iloc[0:3].iloc[0:2] |
| with tm.assert_cow_warning(warn_copy_on_write): |
| s.iloc[0] = 0 |
| expected = Series([1, 2], index=["a", "b"]) |
| if using_copy_on_write: |
| tm.assert_series_equal(subset, expected) |
| else: |
| assert subset.iloc[0] == 0 |
|
|
|
|
| def test_subset_chained_single_block_row( |
| using_copy_on_write, using_array_manager, warn_copy_on_write |
| ): |
| |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) |
| df_orig = df.copy() |
|
|
| |
| subset = df[:].iloc[0].iloc[0:2] |
| with tm.assert_cow_warning(warn_copy_on_write): |
| subset.iloc[0] = 0 |
| if using_copy_on_write or using_array_manager: |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| assert df.iloc[0, 0] == 0 |
|
|
| |
| subset = df[:].iloc[0].iloc[0:2] |
| with tm.assert_cow_warning(warn_copy_on_write): |
| df.iloc[0, 0] = 0 |
| expected = Series([1, 4], index=["a", "b"], name=0) |
| if using_copy_on_write or using_array_manager: |
| tm.assert_series_equal(subset, expected) |
| else: |
| assert subset.iloc[0] == 0 |
|
|
|
|
| @pytest.mark.parametrize( |
| "method", |
| [ |
| lambda df: df[:], |
| lambda df: df.loc[:, :], |
| lambda df: df.loc[:], |
| lambda df: df.iloc[:, :], |
| lambda df: df.iloc[:], |
| ], |
| ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"], |
| ) |
| def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write): |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) |
| df_orig = df.copy() |
|
|
| df2 = method(df) |
|
|
| |
| assert df2 is not df |
|
|
| |
| with tm.assert_cow_warning(warn_copy_on_write): |
| df2.iloc[0, 0] = 0 |
| if using_copy_on_write: |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| assert df.iloc[0, 0] == 0 |
|
|
|
|
| @pytest.mark.parametrize( |
| "method", |
| [ |
| lambda s: s[:], |
| lambda s: s.loc[:], |
| lambda s: s.iloc[:], |
| ], |
| ids=["getitem", "loc", "iloc"], |
| ) |
| def test_null_slice_series(backend, method, using_copy_on_write, warn_copy_on_write): |
| _, _, Series = backend |
| s = Series([1, 2, 3], index=["a", "b", "c"]) |
| s_orig = s.copy() |
|
|
| s2 = method(s) |
|
|
| |
| assert s2 is not s |
|
|
| |
| with tm.assert_cow_warning(warn_copy_on_write): |
| s2.iloc[0] = 0 |
| if using_copy_on_write: |
| tm.assert_series_equal(s, s_orig) |
| else: |
| assert s.iloc[0] == 0 |
|
|
|
|
| |
|
|
|
|
| |
| |
|
|
|
|
| def test_series_getitem_slice(backend, using_copy_on_write, warn_copy_on_write): |
| |
| _, _, Series = backend |
| s = Series([1, 2, 3], index=["a", "b", "c"]) |
| s_orig = s.copy() |
|
|
| subset = s[:] |
| assert np.shares_memory(get_array(subset), get_array(s)) |
|
|
| with tm.assert_cow_warning(warn_copy_on_write): |
| subset.iloc[0] = 0 |
|
|
| if using_copy_on_write: |
| assert not np.shares_memory(get_array(subset), get_array(s)) |
|
|
| expected = Series([0, 2, 3], index=["a", "b", "c"]) |
| tm.assert_series_equal(subset, expected) |
|
|
| if using_copy_on_write: |
| |
| tm.assert_series_equal(s, s_orig) |
| else: |
| |
| assert s.iloc[0] == 0 |
|
|
|
|
| def test_series_getitem_ellipsis(using_copy_on_write, warn_copy_on_write): |
| |
| s = Series([1, 2, 3]) |
| s_orig = s.copy() |
|
|
| subset = s[...] |
| assert np.shares_memory(get_array(subset), get_array(s)) |
|
|
| with tm.assert_cow_warning(warn_copy_on_write): |
| subset.iloc[0] = 0 |
|
|
| if using_copy_on_write: |
| assert not np.shares_memory(get_array(subset), get_array(s)) |
|
|
| expected = Series([0, 2, 3]) |
| tm.assert_series_equal(subset, expected) |
|
|
| if using_copy_on_write: |
| |
| tm.assert_series_equal(s, s_orig) |
| else: |
| |
| assert s.iloc[0] == 0 |
|
|
|
|
| @pytest.mark.parametrize( |
| "indexer", |
| [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], |
| ids=["slice", "mask", "array"], |
| ) |
| def test_series_subset_set_with_indexer( |
| backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write |
| ): |
| |
| _, _, Series = backend |
| s = Series([1, 2, 3], index=["a", "b", "c"]) |
| s_orig = s.copy() |
| subset = s[:] |
|
|
| warn = None |
| msg = "Series.__setitem__ treating keys as positions is deprecated" |
| if ( |
| indexer_si is tm.setitem |
| and isinstance(indexer, np.ndarray) |
| and indexer.dtype.kind == "i" |
| ): |
| warn = FutureWarning |
| if warn_copy_on_write: |
| with tm.assert_cow_warning(raise_on_extra_warnings=warn is not None): |
| indexer_si(subset)[indexer] = 0 |
| else: |
| with tm.assert_produces_warning(warn, match=msg): |
| indexer_si(subset)[indexer] = 0 |
| expected = Series([0, 0, 3], index=["a", "b", "c"]) |
| tm.assert_series_equal(subset, expected) |
|
|
| if using_copy_on_write: |
| tm.assert_series_equal(s, s_orig) |
| else: |
| tm.assert_series_equal(s, expected) |
|
|
|
|
| |
| |
|
|
|
|
| def test_del_frame(backend, using_copy_on_write, warn_copy_on_write): |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
| df2 = df[:] |
|
|
| assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) |
|
|
| del df2["b"] |
|
|
| assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) |
| tm.assert_frame_equal(df, df_orig) |
| tm.assert_frame_equal(df2, df_orig[["a", "c"]]) |
| df2._mgr._verify_integrity() |
|
|
| with tm.assert_cow_warning(warn_copy_on_write and dtype_backend == "numpy"): |
| df.loc[0, "b"] = 200 |
| assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) |
| df_orig = df.copy() |
|
|
| with tm.assert_cow_warning(warn_copy_on_write): |
| df2.loc[0, "a"] = 100 |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| assert df.loc[0, "a"] == 100 |
|
|
|
|
| def test_del_series(backend): |
| _, _, Series = backend |
| s = Series([1, 2, 3], index=["a", "b", "c"]) |
| s_orig = s.copy() |
| s2 = s[:] |
|
|
| assert np.shares_memory(get_array(s), get_array(s2)) |
|
|
| del s2["a"] |
|
|
| assert not np.shares_memory(get_array(s), get_array(s2)) |
| tm.assert_series_equal(s, s_orig) |
| tm.assert_series_equal(s2, s_orig[["b", "c"]]) |
|
|
| |
| values = s2.values |
| s2.loc["b"] = 100 |
| assert values[0] == 100 |
|
|
|
|
| |
| |
|
|
|
|
| def test_column_as_series( |
| backend, using_copy_on_write, warn_copy_on_write, using_array_manager |
| ): |
| |
| dtype_backend, DataFrame, Series = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
|
|
| s = df["a"] |
|
|
| assert np.shares_memory(get_array(s, "a"), get_array(df, "a")) |
|
|
| if using_copy_on_write or using_array_manager: |
| s[0] = 0 |
| else: |
| if warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| s[0] = 0 |
| else: |
| warn = SettingWithCopyWarning if dtype_backend == "numpy" else None |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(warn): |
| s[0] = 0 |
|
|
| expected = Series([0, 2, 3], name="a") |
| tm.assert_series_equal(s, expected) |
| if using_copy_on_write: |
| |
| tm.assert_frame_equal(df, df_orig) |
| |
| tm.assert_series_equal(df["a"], df_orig["a"]) |
| else: |
| df_orig.iloc[0, 0] = 0 |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| def test_column_as_series_set_with_upcast( |
| backend, using_copy_on_write, using_array_manager, warn_copy_on_write |
| ): |
| |
| |
| |
| dtype_backend, DataFrame, Series = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
|
|
| s = df["a"] |
| if dtype_backend == "nullable": |
| with tm.assert_cow_warning(warn_copy_on_write): |
| with pytest.raises(TypeError, match="Invalid value"): |
| s[0] = "foo" |
| expected = Series([1, 2, 3], name="a") |
| elif using_copy_on_write or warn_copy_on_write or using_array_manager: |
| |
| with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): |
| s[0] = "foo" |
| expected = Series(["foo", 2, 3], dtype=object, name="a") |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| msg = "|".join( |
| [ |
| "A value is trying to be set on a copy of a slice from a DataFrame", |
| "Setting an item of incompatible dtype is deprecated", |
| ] |
| ) |
| with tm.assert_produces_warning( |
| (SettingWithCopyWarning, FutureWarning), match=msg |
| ): |
| s[0] = "foo" |
| expected = Series(["foo", 2, 3], dtype=object, name="a") |
|
|
| tm.assert_series_equal(s, expected) |
| if using_copy_on_write: |
| tm.assert_frame_equal(df, df_orig) |
| |
| tm.assert_series_equal(df["a"], df_orig["a"]) |
| else: |
| df_orig["a"] = expected |
| tm.assert_frame_equal(df, df_orig) |
|
|
|
|
| @pytest.mark.parametrize( |
| "method", |
| [ |
| lambda df: df["a"], |
| lambda df: df.loc[:, "a"], |
| lambda df: df.iloc[:, 0], |
| ], |
| ids=["getitem", "loc", "iloc"], |
| ) |
| def test_column_as_series_no_item_cache( |
| request, |
| backend, |
| method, |
| using_copy_on_write, |
| warn_copy_on_write, |
| using_array_manager, |
| ): |
| |
| |
| dtype_backend, DataFrame, _ = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) |
| df_orig = df.copy() |
|
|
| s1 = method(df) |
| s2 = method(df) |
|
|
| is_iloc = "iloc" in request.node.name |
| if using_copy_on_write or warn_copy_on_write or is_iloc: |
| assert s1 is not s2 |
| else: |
| assert s1 is s2 |
|
|
| if using_copy_on_write or using_array_manager: |
| s1.iloc[0] = 0 |
| elif warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| s1.iloc[0] = 0 |
| else: |
| warn = SettingWithCopyWarning if dtype_backend == "numpy" else None |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(warn): |
| s1.iloc[0] = 0 |
|
|
| if using_copy_on_write: |
| tm.assert_series_equal(s2, df_orig["a"]) |
| tm.assert_frame_equal(df, df_orig) |
| else: |
| assert s2.iloc[0] == 0 |
|
|
|
|
| |
|
|
|
|
| def test_dataframe_add_column_from_series(backend, using_copy_on_write): |
| |
| |
| _, DataFrame, Series = backend |
| df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) |
|
|
| s = Series([10, 11, 12]) |
| df["new"] = s |
| if using_copy_on_write: |
| assert np.shares_memory(get_array(df, "new"), get_array(s)) |
| else: |
| assert not np.shares_memory(get_array(df, "new"), get_array(s)) |
|
|
| |
| s[0] = 0 |
| expected = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "new": [10, 11, 12]}) |
| tm.assert_frame_equal(df, expected) |
|
|
|
|
| @pytest.mark.parametrize("val", [100, "a"]) |
| @pytest.mark.parametrize( |
| "indexer_func, indexer", |
| [ |
| (tm.loc, (0, "a")), |
| (tm.iloc, (0, 0)), |
| (tm.loc, ([0], "a")), |
| (tm.iloc, ([0], 0)), |
| (tm.loc, (slice(None), "a")), |
| (tm.iloc, (slice(None), 0)), |
| ], |
| ) |
| @pytest.mark.parametrize( |
| "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"] |
| ) |
| def test_set_value_copy_only_necessary_column( |
| using_copy_on_write, warn_copy_on_write, indexer_func, indexer, val, col |
| ): |
| |
| |
| df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col}) |
| df_orig = df.copy() |
| view = df[:] |
|
|
| if val == "a" and not warn_copy_on_write: |
| with tm.assert_produces_warning( |
| FutureWarning, match="Setting an item of incompatible dtype is deprecated" |
| ): |
| indexer_func(df)[indexer] = val |
| if val == "a" and warn_copy_on_write: |
| with tm.assert_produces_warning( |
| FutureWarning, match="incompatible dtype|Setting a value on a view" |
| ): |
| indexer_func(df)[indexer] = val |
| else: |
| with tm.assert_cow_warning(warn_copy_on_write and val == 100): |
| indexer_func(df)[indexer] = val |
|
|
| if using_copy_on_write: |
| assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) |
| assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) |
| tm.assert_frame_equal(view, df_orig) |
| else: |
| assert np.shares_memory(get_array(df, "c"), get_array(view, "c")) |
| if val == "a": |
| assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) |
| else: |
| assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) |
|
|
|
|
| def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): |
| ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])) |
| ser_orig = ser.copy() |
| result = ser[1] |
| assert np.shares_memory(get_array(ser), get_array(result)) |
| with tm.assert_cow_warning(warn_copy_on_write): |
| result.iloc[0] = 100 |
| if using_copy_on_write: |
| tm.assert_series_equal(ser, ser_orig) |
| else: |
| expected = Series( |
| [100, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]) |
| ) |
| tm.assert_series_equal(ser, expected) |
|
|
|
|
| def test_getitem_midx_slice( |
| using_copy_on_write, warn_copy_on_write, using_array_manager |
| ): |
| df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2}) |
| df_orig = df.copy() |
| new_df = df[("a",)] |
|
|
| if using_copy_on_write: |
| assert not new_df._mgr._has_no_reference(0) |
|
|
| if not using_array_manager: |
| assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x")) |
| if using_copy_on_write: |
| new_df.iloc[0, 0] = 100 |
| tm.assert_frame_equal(df_orig, df) |
| else: |
| if warn_copy_on_write: |
| with tm.assert_cow_warning(): |
| new_df.iloc[0, 0] = 100 |
| else: |
| with pd.option_context("chained_assignment", "warn"): |
| with tm.assert_produces_warning(SettingWithCopyWarning): |
| new_df.iloc[0, 0] = 100 |
| assert df.iloc[0, 0] == 100 |
|
|
|
|
| def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): |
| ser = Series( |
| [1, 2, 3], |
| index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), |
| ) |
| result = ser[(1, 2)] |
| assert np.shares_memory(get_array(ser), get_array(result)) |
| with tm.assert_cow_warning(warn_copy_on_write): |
| result.iloc[0] = 100 |
| if using_copy_on_write: |
| expected = Series( |
| [1, 2, 3], |
| index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), |
| ) |
| tm.assert_series_equal(ser, expected) |
|
|
|
|
| def test_midx_read_only_bool_indexer(): |
| |
| def mklbl(prefix, n): |
| return [f"{prefix}{i}" for i in range(n)] |
|
|
| idx = pd.MultiIndex.from_product( |
| [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)] |
| ) |
| cols = pd.MultiIndex.from_tuples( |
| [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"] |
| ) |
| df = DataFrame(1, index=idx, columns=cols).sort_index().sort_index(axis=1) |
|
|
| mask = df[("a", "foo")] == 1 |
| expected_mask = mask.copy() |
| result = df.loc[pd.IndexSlice[mask, :, ["C1", "C3"]], :] |
| expected = df.loc[pd.IndexSlice[:, :, ["C1", "C3"]], :] |
| tm.assert_frame_equal(result, expected) |
| tm.assert_series_equal(mask, expected_mask) |
|
|
|
|
| def test_loc_enlarging_with_dataframe(using_copy_on_write): |
| df = DataFrame({"a": [1, 2, 3]}) |
| rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) |
| rhs_orig = rhs.copy() |
| df.loc[:, ["b", "c"]] = rhs |
| if using_copy_on_write: |
| assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) |
| assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c")) |
| assert not df._mgr._has_no_reference(1) |
| else: |
| assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) |
|
|
| df.iloc[0, 1] = 100 |
| tm.assert_frame_equal(rhs, rhs_orig) |
|
|