Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/algorithms.py +1747 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/flags.py +117 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/roperator.py +62 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__init__.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_aggregation.py +93 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_algos.py +2041 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_common.py +267 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_downstream.py +362 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_errors.py +112 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_expressions.py +466 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_flags.py +48 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_multilevel.py +355 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_nanops.py +1274 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_optional_dependency.py +100 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_register_accessor.py +103 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_sorting.py +487 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_take.py +307 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/__init__.py +29 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_decorators.py +508 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_doctools.py +202 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_exceptions.py +103 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_print_versions.py +158 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_test_decorators.py +173 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_tester.py +53 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_validators.py +456 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/__init__.py +61 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/assume_constant_result.py +20 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/autograd_function.py +25 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/class_method.py +22 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_class_method.py +44 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nested_function.py +41 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nonlocal_variables.py +59 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_closed_over_variable.py +22 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_operands.py +35 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_predicate.py +25 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_size_example.py +23 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_value_example.py +26 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/decorator.py +23 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dictionary.py +17 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_assert.py +18 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_constructor.py +15 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_if_guard.py +19 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_map.py +19 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_round.py +21 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_slicing.py +15 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/fn_with_kwargs.py +30 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/list_contains.py +17 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/algorithms.py
ADDED
|
@@ -0,0 +1,1747 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Generic data algorithms. This module is experimental at the moment and not
|
| 3 |
+
intended for public consumption
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import decimal
|
| 8 |
+
import operator
|
| 9 |
+
from textwrap import dedent
|
| 10 |
+
from typing import (
|
| 11 |
+
TYPE_CHECKING,
|
| 12 |
+
Literal,
|
| 13 |
+
cast,
|
| 14 |
+
)
|
| 15 |
+
import warnings
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
from pandas._libs import (
|
| 20 |
+
algos,
|
| 21 |
+
hashtable as htable,
|
| 22 |
+
iNaT,
|
| 23 |
+
lib,
|
| 24 |
+
)
|
| 25 |
+
from pandas._typing import (
|
| 26 |
+
AnyArrayLike,
|
| 27 |
+
ArrayLike,
|
| 28 |
+
AxisInt,
|
| 29 |
+
DtypeObj,
|
| 30 |
+
TakeIndexer,
|
| 31 |
+
npt,
|
| 32 |
+
)
|
| 33 |
+
from pandas.util._decorators import doc
|
| 34 |
+
from pandas.util._exceptions import find_stack_level
|
| 35 |
+
|
| 36 |
+
from pandas.core.dtypes.cast import (
|
| 37 |
+
construct_1d_object_array_from_listlike,
|
| 38 |
+
np_find_common_type,
|
| 39 |
+
)
|
| 40 |
+
from pandas.core.dtypes.common import (
|
| 41 |
+
ensure_float64,
|
| 42 |
+
ensure_object,
|
| 43 |
+
ensure_platform_int,
|
| 44 |
+
is_array_like,
|
| 45 |
+
is_bool_dtype,
|
| 46 |
+
is_complex_dtype,
|
| 47 |
+
is_dict_like,
|
| 48 |
+
is_extension_array_dtype,
|
| 49 |
+
is_float_dtype,
|
| 50 |
+
is_integer,
|
| 51 |
+
is_integer_dtype,
|
| 52 |
+
is_list_like,
|
| 53 |
+
is_object_dtype,
|
| 54 |
+
is_signed_integer_dtype,
|
| 55 |
+
needs_i8_conversion,
|
| 56 |
+
)
|
| 57 |
+
from pandas.core.dtypes.concat import concat_compat
|
| 58 |
+
from pandas.core.dtypes.dtypes import (
|
| 59 |
+
BaseMaskedDtype,
|
| 60 |
+
CategoricalDtype,
|
| 61 |
+
ExtensionDtype,
|
| 62 |
+
NumpyEADtype,
|
| 63 |
+
)
|
| 64 |
+
from pandas.core.dtypes.generic import (
|
| 65 |
+
ABCDatetimeArray,
|
| 66 |
+
ABCExtensionArray,
|
| 67 |
+
ABCIndex,
|
| 68 |
+
ABCMultiIndex,
|
| 69 |
+
ABCSeries,
|
| 70 |
+
ABCTimedeltaArray,
|
| 71 |
+
)
|
| 72 |
+
from pandas.core.dtypes.missing import (
|
| 73 |
+
isna,
|
| 74 |
+
na_value_for_dtype,
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
from pandas.core.array_algos.take import take_nd
|
| 78 |
+
from pandas.core.construction import (
|
| 79 |
+
array as pd_array,
|
| 80 |
+
ensure_wrapped_if_datetimelike,
|
| 81 |
+
extract_array,
|
| 82 |
+
)
|
| 83 |
+
from pandas.core.indexers import validate_indices
|
| 84 |
+
|
| 85 |
+
if TYPE_CHECKING:
|
| 86 |
+
from pandas._typing import (
|
| 87 |
+
ListLike,
|
| 88 |
+
NumpySorter,
|
| 89 |
+
NumpyValueArrayLike,
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
from pandas import (
|
| 93 |
+
Categorical,
|
| 94 |
+
Index,
|
| 95 |
+
Series,
|
| 96 |
+
)
|
| 97 |
+
from pandas.core.arrays import (
|
| 98 |
+
BaseMaskedArray,
|
| 99 |
+
ExtensionArray,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# --------------- #
|
| 104 |
+
# dtype access #
|
| 105 |
+
# --------------- #
|
| 106 |
+
def _ensure_data(values: ArrayLike) -> np.ndarray:
|
| 107 |
+
"""
|
| 108 |
+
routine to ensure that our data is of the correct
|
| 109 |
+
input dtype for lower-level routines
|
| 110 |
+
|
| 111 |
+
This will coerce:
|
| 112 |
+
- ints -> int64
|
| 113 |
+
- uint -> uint64
|
| 114 |
+
- bool -> uint8
|
| 115 |
+
- datetimelike -> i8
|
| 116 |
+
- datetime64tz -> i8 (in local tz)
|
| 117 |
+
- categorical -> codes
|
| 118 |
+
|
| 119 |
+
Parameters
|
| 120 |
+
----------
|
| 121 |
+
values : np.ndarray or ExtensionArray
|
| 122 |
+
|
| 123 |
+
Returns
|
| 124 |
+
-------
|
| 125 |
+
np.ndarray
|
| 126 |
+
"""
|
| 127 |
+
|
| 128 |
+
if not isinstance(values, ABCMultiIndex):
|
| 129 |
+
# extract_array would raise
|
| 130 |
+
values = extract_array(values, extract_numpy=True)
|
| 131 |
+
|
| 132 |
+
if is_object_dtype(values.dtype):
|
| 133 |
+
return ensure_object(np.asarray(values))
|
| 134 |
+
|
| 135 |
+
elif isinstance(values.dtype, BaseMaskedDtype):
|
| 136 |
+
# i.e. BooleanArray, FloatingArray, IntegerArray
|
| 137 |
+
values = cast("BaseMaskedArray", values)
|
| 138 |
+
if not values._hasna:
|
| 139 |
+
# No pd.NAs -> We can avoid an object-dtype cast (and copy) GH#41816
|
| 140 |
+
# recurse to avoid re-implementing logic for eg bool->uint8
|
| 141 |
+
return _ensure_data(values._data)
|
| 142 |
+
return np.asarray(values)
|
| 143 |
+
|
| 144 |
+
elif isinstance(values.dtype, CategoricalDtype):
|
| 145 |
+
# NB: cases that go through here should NOT be using _reconstruct_data
|
| 146 |
+
# on the back-end.
|
| 147 |
+
values = cast("Categorical", values)
|
| 148 |
+
return values.codes
|
| 149 |
+
|
| 150 |
+
elif is_bool_dtype(values.dtype):
|
| 151 |
+
if isinstance(values, np.ndarray):
|
| 152 |
+
# i.e. actually dtype == np.dtype("bool")
|
| 153 |
+
return np.asarray(values).view("uint8")
|
| 154 |
+
else:
|
| 155 |
+
# e.g. Sparse[bool, False] # TODO: no test cases get here
|
| 156 |
+
return np.asarray(values).astype("uint8", copy=False)
|
| 157 |
+
|
| 158 |
+
elif is_integer_dtype(values.dtype):
|
| 159 |
+
return np.asarray(values)
|
| 160 |
+
|
| 161 |
+
elif is_float_dtype(values.dtype):
|
| 162 |
+
# Note: checking `values.dtype == "float128"` raises on Windows and 32bit
|
| 163 |
+
# error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype, dtype[Any]]"
|
| 164 |
+
# has no attribute "itemsize"
|
| 165 |
+
if values.dtype.itemsize in [2, 12, 16]: # type: ignore[union-attr]
|
| 166 |
+
# we dont (yet) have float128 hashtable support
|
| 167 |
+
return ensure_float64(values)
|
| 168 |
+
return np.asarray(values)
|
| 169 |
+
|
| 170 |
+
elif is_complex_dtype(values.dtype):
|
| 171 |
+
return cast(np.ndarray, values)
|
| 172 |
+
|
| 173 |
+
# datetimelike
|
| 174 |
+
elif needs_i8_conversion(values.dtype):
|
| 175 |
+
npvalues = values.view("i8")
|
| 176 |
+
npvalues = cast(np.ndarray, npvalues)
|
| 177 |
+
return npvalues
|
| 178 |
+
|
| 179 |
+
# we have failed, return object
|
| 180 |
+
values = np.asarray(values, dtype=object)
|
| 181 |
+
return ensure_object(values)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def _reconstruct_data(
|
| 185 |
+
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
|
| 186 |
+
) -> ArrayLike:
|
| 187 |
+
"""
|
| 188 |
+
reverse of _ensure_data
|
| 189 |
+
|
| 190 |
+
Parameters
|
| 191 |
+
----------
|
| 192 |
+
values : np.ndarray or ExtensionArray
|
| 193 |
+
dtype : np.dtype or ExtensionDtype
|
| 194 |
+
original : AnyArrayLike
|
| 195 |
+
|
| 196 |
+
Returns
|
| 197 |
+
-------
|
| 198 |
+
ExtensionArray or np.ndarray
|
| 199 |
+
"""
|
| 200 |
+
if isinstance(values, ABCExtensionArray) and values.dtype == dtype:
|
| 201 |
+
# Catch DatetimeArray/TimedeltaArray
|
| 202 |
+
return values
|
| 203 |
+
|
| 204 |
+
if not isinstance(dtype, np.dtype):
|
| 205 |
+
# i.e. ExtensionDtype; note we have ruled out above the possibility
|
| 206 |
+
# that values.dtype == dtype
|
| 207 |
+
cls = dtype.construct_array_type()
|
| 208 |
+
|
| 209 |
+
values = cls._from_sequence(values, dtype=dtype)
|
| 210 |
+
|
| 211 |
+
else:
|
| 212 |
+
values = values.astype(dtype, copy=False)
|
| 213 |
+
|
| 214 |
+
return values
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def _ensure_arraylike(values, func_name: str) -> ArrayLike:
|
| 218 |
+
"""
|
| 219 |
+
ensure that we are arraylike if not already
|
| 220 |
+
"""
|
| 221 |
+
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
|
| 222 |
+
# GH#52986
|
| 223 |
+
if func_name != "isin-targets":
|
| 224 |
+
# Make an exception for the comps argument in isin.
|
| 225 |
+
warnings.warn(
|
| 226 |
+
f"{func_name} with argument that is not not a Series, Index, "
|
| 227 |
+
"ExtensionArray, or np.ndarray is deprecated and will raise in a "
|
| 228 |
+
"future version.",
|
| 229 |
+
FutureWarning,
|
| 230 |
+
stacklevel=find_stack_level(),
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
inferred = lib.infer_dtype(values, skipna=False)
|
| 234 |
+
if inferred in ["mixed", "string", "mixed-integer"]:
|
| 235 |
+
# "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160
|
| 236 |
+
if isinstance(values, tuple):
|
| 237 |
+
values = list(values)
|
| 238 |
+
values = construct_1d_object_array_from_listlike(values)
|
| 239 |
+
else:
|
| 240 |
+
values = np.asarray(values)
|
| 241 |
+
return values
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
_hashtables = {
|
| 245 |
+
"complex128": htable.Complex128HashTable,
|
| 246 |
+
"complex64": htable.Complex64HashTable,
|
| 247 |
+
"float64": htable.Float64HashTable,
|
| 248 |
+
"float32": htable.Float32HashTable,
|
| 249 |
+
"uint64": htable.UInt64HashTable,
|
| 250 |
+
"uint32": htable.UInt32HashTable,
|
| 251 |
+
"uint16": htable.UInt16HashTable,
|
| 252 |
+
"uint8": htable.UInt8HashTable,
|
| 253 |
+
"int64": htable.Int64HashTable,
|
| 254 |
+
"int32": htable.Int32HashTable,
|
| 255 |
+
"int16": htable.Int16HashTable,
|
| 256 |
+
"int8": htable.Int8HashTable,
|
| 257 |
+
"string": htable.StringHashTable,
|
| 258 |
+
"object": htable.PyObjectHashTable,
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def _get_hashtable_algo(values: np.ndarray):
|
| 263 |
+
"""
|
| 264 |
+
Parameters
|
| 265 |
+
----------
|
| 266 |
+
values : np.ndarray
|
| 267 |
+
|
| 268 |
+
Returns
|
| 269 |
+
-------
|
| 270 |
+
htable : HashTable subclass
|
| 271 |
+
values : ndarray
|
| 272 |
+
"""
|
| 273 |
+
values = _ensure_data(values)
|
| 274 |
+
|
| 275 |
+
ndtype = _check_object_for_strings(values)
|
| 276 |
+
hashtable = _hashtables[ndtype]
|
| 277 |
+
return hashtable, values
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def _check_object_for_strings(values: np.ndarray) -> str:
|
| 281 |
+
"""
|
| 282 |
+
Check if we can use string hashtable instead of object hashtable.
|
| 283 |
+
|
| 284 |
+
Parameters
|
| 285 |
+
----------
|
| 286 |
+
values : ndarray
|
| 287 |
+
|
| 288 |
+
Returns
|
| 289 |
+
-------
|
| 290 |
+
str
|
| 291 |
+
"""
|
| 292 |
+
ndtype = values.dtype.name
|
| 293 |
+
if ndtype == "object":
|
| 294 |
+
# it's cheaper to use a String Hash Table than Object; we infer
|
| 295 |
+
# including nulls because that is the only difference between
|
| 296 |
+
# StringHashTable and ObjectHashtable
|
| 297 |
+
if lib.is_string_array(values, skipna=False):
|
| 298 |
+
ndtype = "string"
|
| 299 |
+
return ndtype
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
# --------------- #
|
| 303 |
+
# top-level algos #
|
| 304 |
+
# --------------- #
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def unique(values):
|
| 308 |
+
"""
|
| 309 |
+
Return unique values based on a hash table.
|
| 310 |
+
|
| 311 |
+
Uniques are returned in order of appearance. This does NOT sort.
|
| 312 |
+
|
| 313 |
+
Significantly faster than numpy.unique for long enough sequences.
|
| 314 |
+
Includes NA values.
|
| 315 |
+
|
| 316 |
+
Parameters
|
| 317 |
+
----------
|
| 318 |
+
values : 1d array-like
|
| 319 |
+
|
| 320 |
+
Returns
|
| 321 |
+
-------
|
| 322 |
+
numpy.ndarray or ExtensionArray
|
| 323 |
+
|
| 324 |
+
The return can be:
|
| 325 |
+
|
| 326 |
+
* Index : when the input is an Index
|
| 327 |
+
* Categorical : when the input is a Categorical dtype
|
| 328 |
+
* ndarray : when the input is a Series/ndarray
|
| 329 |
+
|
| 330 |
+
Return numpy.ndarray or ExtensionArray.
|
| 331 |
+
|
| 332 |
+
See Also
|
| 333 |
+
--------
|
| 334 |
+
Index.unique : Return unique values from an Index.
|
| 335 |
+
Series.unique : Return unique values of Series object.
|
| 336 |
+
|
| 337 |
+
Examples
|
| 338 |
+
--------
|
| 339 |
+
>>> pd.unique(pd.Series([2, 1, 3, 3]))
|
| 340 |
+
array([2, 1, 3])
|
| 341 |
+
|
| 342 |
+
>>> pd.unique(pd.Series([2] + [1] * 5))
|
| 343 |
+
array([2, 1])
|
| 344 |
+
|
| 345 |
+
>>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
|
| 346 |
+
array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
|
| 347 |
+
|
| 348 |
+
>>> pd.unique(
|
| 349 |
+
... pd.Series(
|
| 350 |
+
... [
|
| 351 |
+
... pd.Timestamp("20160101", tz="US/Eastern"),
|
| 352 |
+
... pd.Timestamp("20160101", tz="US/Eastern"),
|
| 353 |
+
... ]
|
| 354 |
+
... )
|
| 355 |
+
... )
|
| 356 |
+
<DatetimeArray>
|
| 357 |
+
['2016-01-01 00:00:00-05:00']
|
| 358 |
+
Length: 1, dtype: datetime64[ns, US/Eastern]
|
| 359 |
+
|
| 360 |
+
>>> pd.unique(
|
| 361 |
+
... pd.Index(
|
| 362 |
+
... [
|
| 363 |
+
... pd.Timestamp("20160101", tz="US/Eastern"),
|
| 364 |
+
... pd.Timestamp("20160101", tz="US/Eastern"),
|
| 365 |
+
... ]
|
| 366 |
+
... )
|
| 367 |
+
... )
|
| 368 |
+
DatetimeIndex(['2016-01-01 00:00:00-05:00'],
|
| 369 |
+
dtype='datetime64[ns, US/Eastern]',
|
| 370 |
+
freq=None)
|
| 371 |
+
|
| 372 |
+
>>> pd.unique(np.array(list("baabc"), dtype="O"))
|
| 373 |
+
array(['b', 'a', 'c'], dtype=object)
|
| 374 |
+
|
| 375 |
+
An unordered Categorical will return categories in the
|
| 376 |
+
order of appearance.
|
| 377 |
+
|
| 378 |
+
>>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
|
| 379 |
+
['b', 'a', 'c']
|
| 380 |
+
Categories (3, object): ['a', 'b', 'c']
|
| 381 |
+
|
| 382 |
+
>>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
|
| 383 |
+
['b', 'a', 'c']
|
| 384 |
+
Categories (3, object): ['a', 'b', 'c']
|
| 385 |
+
|
| 386 |
+
An ordered Categorical preserves the category ordering.
|
| 387 |
+
|
| 388 |
+
>>> pd.unique(
|
| 389 |
+
... pd.Series(
|
| 390 |
+
... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
|
| 391 |
+
... )
|
| 392 |
+
... )
|
| 393 |
+
['b', 'a', 'c']
|
| 394 |
+
Categories (3, object): ['a' < 'b' < 'c']
|
| 395 |
+
|
| 396 |
+
An array of tuples
|
| 397 |
+
|
| 398 |
+
>>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
|
| 399 |
+
array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
|
| 400 |
+
"""
|
| 401 |
+
return unique_with_mask(values)
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def nunique_ints(values: ArrayLike) -> int:
|
| 405 |
+
"""
|
| 406 |
+
Return the number of unique values for integer array-likes.
|
| 407 |
+
|
| 408 |
+
Significantly faster than pandas.unique for long enough sequences.
|
| 409 |
+
No checks are done to ensure input is integral.
|
| 410 |
+
|
| 411 |
+
Parameters
|
| 412 |
+
----------
|
| 413 |
+
values : 1d array-like
|
| 414 |
+
|
| 415 |
+
Returns
|
| 416 |
+
-------
|
| 417 |
+
int : The number of unique values in ``values``
|
| 418 |
+
"""
|
| 419 |
+
if len(values) == 0:
|
| 420 |
+
return 0
|
| 421 |
+
values = _ensure_data(values)
|
| 422 |
+
# bincount requires intp
|
| 423 |
+
result = (np.bincount(values.ravel().astype("intp")) != 0).sum()
|
| 424 |
+
return result
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
|
| 428 |
+
"""See algorithms.unique for docs. Takes a mask for masked arrays."""
|
| 429 |
+
values = _ensure_arraylike(values, func_name="unique")
|
| 430 |
+
|
| 431 |
+
if isinstance(values.dtype, ExtensionDtype):
|
| 432 |
+
# Dispatch to extension dtype's unique.
|
| 433 |
+
return values.unique()
|
| 434 |
+
|
| 435 |
+
original = values
|
| 436 |
+
hashtable, values = _get_hashtable_algo(values)
|
| 437 |
+
|
| 438 |
+
table = hashtable(len(values))
|
| 439 |
+
if mask is None:
|
| 440 |
+
uniques = table.unique(values)
|
| 441 |
+
uniques = _reconstruct_data(uniques, original.dtype, original)
|
| 442 |
+
return uniques
|
| 443 |
+
|
| 444 |
+
else:
|
| 445 |
+
uniques, mask = table.unique(values, mask=mask)
|
| 446 |
+
uniques = _reconstruct_data(uniques, original.dtype, original)
|
| 447 |
+
assert mask is not None # for mypy
|
| 448 |
+
return uniques, mask.astype("bool")
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
unique1d = unique
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
_MINIMUM_COMP_ARR_LEN = 1_000_000
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
|
| 458 |
+
"""
|
| 459 |
+
Compute the isin boolean array.
|
| 460 |
+
|
| 461 |
+
Parameters
|
| 462 |
+
----------
|
| 463 |
+
comps : list-like
|
| 464 |
+
values : list-like
|
| 465 |
+
|
| 466 |
+
Returns
|
| 467 |
+
-------
|
| 468 |
+
ndarray[bool]
|
| 469 |
+
Same length as `comps`.
|
| 470 |
+
"""
|
| 471 |
+
if not is_list_like(comps):
|
| 472 |
+
raise TypeError(
|
| 473 |
+
"only list-like objects are allowed to be passed "
|
| 474 |
+
f"to isin(), you passed a `{type(comps).__name__}`"
|
| 475 |
+
)
|
| 476 |
+
if not is_list_like(values):
|
| 477 |
+
raise TypeError(
|
| 478 |
+
"only list-like objects are allowed to be passed "
|
| 479 |
+
f"to isin(), you passed a `{type(values).__name__}`"
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
|
| 483 |
+
orig_values = list(values)
|
| 484 |
+
values = _ensure_arraylike(orig_values, func_name="isin-targets")
|
| 485 |
+
|
| 486 |
+
if (
|
| 487 |
+
len(values) > 0
|
| 488 |
+
and values.dtype.kind in "iufcb"
|
| 489 |
+
and not is_signed_integer_dtype(comps)
|
| 490 |
+
):
|
| 491 |
+
# GH#46485 Use object to avoid upcast to float64 later
|
| 492 |
+
# TODO: Share with _find_common_type_compat
|
| 493 |
+
values = construct_1d_object_array_from_listlike(orig_values)
|
| 494 |
+
|
| 495 |
+
elif isinstance(values, ABCMultiIndex):
|
| 496 |
+
# Avoid raising in extract_array
|
| 497 |
+
values = np.array(values)
|
| 498 |
+
else:
|
| 499 |
+
values = extract_array(values, extract_numpy=True, extract_range=True)
|
| 500 |
+
|
| 501 |
+
comps_array = _ensure_arraylike(comps, func_name="isin")
|
| 502 |
+
comps_array = extract_array(comps_array, extract_numpy=True)
|
| 503 |
+
if not isinstance(comps_array, np.ndarray):
|
| 504 |
+
# i.e. Extension Array
|
| 505 |
+
return comps_array.isin(values)
|
| 506 |
+
|
| 507 |
+
elif needs_i8_conversion(comps_array.dtype):
|
| 508 |
+
# Dispatch to DatetimeLikeArrayMixin.isin
|
| 509 |
+
return pd_array(comps_array).isin(values)
|
| 510 |
+
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps_array.dtype):
|
| 511 |
+
# e.g. comps_array are integers and values are datetime64s
|
| 512 |
+
return np.zeros(comps_array.shape, dtype=bool)
|
| 513 |
+
# TODO: not quite right ... Sparse/Categorical
|
| 514 |
+
elif needs_i8_conversion(values.dtype):
|
| 515 |
+
return isin(comps_array, values.astype(object))
|
| 516 |
+
|
| 517 |
+
elif isinstance(values.dtype, ExtensionDtype):
|
| 518 |
+
return isin(np.asarray(comps_array), np.asarray(values))
|
| 519 |
+
|
| 520 |
+
# GH16012
|
| 521 |
+
# Ensure np.isin doesn't get object types or it *may* throw an exception
|
| 522 |
+
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
|
| 523 |
+
# isin is faster for small sizes
|
| 524 |
+
if (
|
| 525 |
+
len(comps_array) > _MINIMUM_COMP_ARR_LEN
|
| 526 |
+
and len(values) <= 26
|
| 527 |
+
and comps_array.dtype != object
|
| 528 |
+
):
|
| 529 |
+
# If the values include nan we need to check for nan explicitly
|
| 530 |
+
# since np.nan it not equal to np.nan
|
| 531 |
+
if isna(values).any():
|
| 532 |
+
|
| 533 |
+
def f(c, v):
|
| 534 |
+
return np.logical_or(np.isin(c, v).ravel(), np.isnan(c))
|
| 535 |
+
|
| 536 |
+
else:
|
| 537 |
+
f = lambda a, b: np.isin(a, b).ravel()
|
| 538 |
+
|
| 539 |
+
else:
|
| 540 |
+
common = np_find_common_type(values.dtype, comps_array.dtype)
|
| 541 |
+
values = values.astype(common, copy=False)
|
| 542 |
+
comps_array = comps_array.astype(common, copy=False)
|
| 543 |
+
f = htable.ismember
|
| 544 |
+
|
| 545 |
+
return f(comps_array, values)
|
| 546 |
+
|
| 547 |
+
|
| 548 |
+
def factorize_array(
|
| 549 |
+
values: np.ndarray,
|
| 550 |
+
use_na_sentinel: bool = True,
|
| 551 |
+
size_hint: int | None = None,
|
| 552 |
+
na_value: object = None,
|
| 553 |
+
mask: npt.NDArray[np.bool_] | None = None,
|
| 554 |
+
) -> tuple[npt.NDArray[np.intp], np.ndarray]:
|
| 555 |
+
"""
|
| 556 |
+
Factorize a numpy array to codes and uniques.
|
| 557 |
+
|
| 558 |
+
This doesn't do any coercion of types or unboxing before factorization.
|
| 559 |
+
|
| 560 |
+
Parameters
|
| 561 |
+
----------
|
| 562 |
+
values : ndarray
|
| 563 |
+
use_na_sentinel : bool, default True
|
| 564 |
+
If True, the sentinel -1 will be used for NaN values. If False,
|
| 565 |
+
NaN values will be encoded as non-negative integers and will not drop the
|
| 566 |
+
NaN from the uniques of the values.
|
| 567 |
+
size_hint : int, optional
|
| 568 |
+
Passed through to the hashtable's 'get_labels' method
|
| 569 |
+
na_value : object, optional
|
| 570 |
+
A value in `values` to consider missing. Note: only use this
|
| 571 |
+
parameter when you know that you don't have any values pandas would
|
| 572 |
+
consider missing in the array (NaN for float data, iNaT for
|
| 573 |
+
datetimes, etc.).
|
| 574 |
+
mask : ndarray[bool], optional
|
| 575 |
+
If not None, the mask is used as indicator for missing values
|
| 576 |
+
(True = missing, False = valid) instead of `na_value` or
|
| 577 |
+
condition "val != val".
|
| 578 |
+
|
| 579 |
+
Returns
|
| 580 |
+
-------
|
| 581 |
+
codes : ndarray[np.intp]
|
| 582 |
+
uniques : ndarray
|
| 583 |
+
"""
|
| 584 |
+
original = values
|
| 585 |
+
if values.dtype.kind in "mM":
|
| 586 |
+
# _get_hashtable_algo will cast dt64/td64 to i8 via _ensure_data, so we
|
| 587 |
+
# need to do the same to na_value. We are assuming here that the passed
|
| 588 |
+
# na_value is an appropriately-typed NaT.
|
| 589 |
+
# e.g. test_where_datetimelike_categorical
|
| 590 |
+
na_value = iNaT
|
| 591 |
+
|
| 592 |
+
hash_klass, values = _get_hashtable_algo(values)
|
| 593 |
+
|
| 594 |
+
table = hash_klass(size_hint or len(values))
|
| 595 |
+
uniques, codes = table.factorize(
|
| 596 |
+
values,
|
| 597 |
+
na_sentinel=-1,
|
| 598 |
+
na_value=na_value,
|
| 599 |
+
mask=mask,
|
| 600 |
+
ignore_na=use_na_sentinel,
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
# re-cast e.g. i8->dt64/td64, uint8->bool
|
| 604 |
+
uniques = _reconstruct_data(uniques, original.dtype, original)
|
| 605 |
+
|
| 606 |
+
codes = ensure_platform_int(codes)
|
| 607 |
+
return codes, uniques
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
@doc(
|
| 611 |
+
values=dedent(
|
| 612 |
+
"""\
|
| 613 |
+
values : sequence
|
| 614 |
+
A 1-D sequence. Sequences that aren't pandas objects are
|
| 615 |
+
coerced to ndarrays before factorization.
|
| 616 |
+
"""
|
| 617 |
+
),
|
| 618 |
+
sort=dedent(
|
| 619 |
+
"""\
|
| 620 |
+
sort : bool, default False
|
| 621 |
+
Sort `uniques` and shuffle `codes` to maintain the
|
| 622 |
+
relationship.
|
| 623 |
+
"""
|
| 624 |
+
),
|
| 625 |
+
size_hint=dedent(
|
| 626 |
+
"""\
|
| 627 |
+
size_hint : int, optional
|
| 628 |
+
Hint to the hashtable sizer.
|
| 629 |
+
"""
|
| 630 |
+
),
|
| 631 |
+
)
|
| 632 |
+
def factorize(
|
| 633 |
+
values,
|
| 634 |
+
sort: bool = False,
|
| 635 |
+
use_na_sentinel: bool = True,
|
| 636 |
+
size_hint: int | None = None,
|
| 637 |
+
) -> tuple[np.ndarray, np.ndarray | Index]:
|
| 638 |
+
"""
|
| 639 |
+
Encode the object as an enumerated type or categorical variable.
|
| 640 |
+
|
| 641 |
+
This method is useful for obtaining a numeric representation of an
|
| 642 |
+
array when all that matters is identifying distinct values. `factorize`
|
| 643 |
+
is available as both a top-level function :func:`pandas.factorize`,
|
| 644 |
+
and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.
|
| 645 |
+
|
| 646 |
+
Parameters
|
| 647 |
+
----------
|
| 648 |
+
{values}{sort}
|
| 649 |
+
use_na_sentinel : bool, default True
|
| 650 |
+
If True, the sentinel -1 will be used for NaN values. If False,
|
| 651 |
+
NaN values will be encoded as non-negative integers and will not drop the
|
| 652 |
+
NaN from the uniques of the values.
|
| 653 |
+
|
| 654 |
+
.. versionadded:: 1.5.0
|
| 655 |
+
{size_hint}\
|
| 656 |
+
|
| 657 |
+
Returns
|
| 658 |
+
-------
|
| 659 |
+
codes : ndarray
|
| 660 |
+
An integer ndarray that's an indexer into `uniques`.
|
| 661 |
+
``uniques.take(codes)`` will have the same values as `values`.
|
| 662 |
+
uniques : ndarray, Index, or Categorical
|
| 663 |
+
The unique valid values. When `values` is Categorical, `uniques`
|
| 664 |
+
is a Categorical. When `values` is some other pandas object, an
|
| 665 |
+
`Index` is returned. Otherwise, a 1-D ndarray is returned.
|
| 666 |
+
|
| 667 |
+
.. note::
|
| 668 |
+
|
| 669 |
+
Even if there's a missing value in `values`, `uniques` will
|
| 670 |
+
*not* contain an entry for it.
|
| 671 |
+
|
| 672 |
+
See Also
|
| 673 |
+
--------
|
| 674 |
+
cut : Discretize continuous-valued array.
|
| 675 |
+
unique : Find the unique value in an array.
|
| 676 |
+
|
| 677 |
+
Notes
|
| 678 |
+
-----
|
| 679 |
+
Reference :ref:`the user guide <reshaping.factorize>` for more examples.
|
| 680 |
+
|
| 681 |
+
Examples
|
| 682 |
+
--------
|
| 683 |
+
These examples all show factorize as a top-level method like
|
| 684 |
+
``pd.factorize(values)``. The results are identical for methods like
|
| 685 |
+
:meth:`Series.factorize`.
|
| 686 |
+
|
| 687 |
+
>>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"))
|
| 688 |
+
>>> codes
|
| 689 |
+
array([0, 0, 1, 2, 0])
|
| 690 |
+
>>> uniques
|
| 691 |
+
array(['b', 'a', 'c'], dtype=object)
|
| 692 |
+
|
| 693 |
+
With ``sort=True``, the `uniques` will be sorted, and `codes` will be
|
| 694 |
+
shuffled so that the relationship is the maintained.
|
| 695 |
+
|
| 696 |
+
>>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"),
|
| 697 |
+
... sort=True)
|
| 698 |
+
>>> codes
|
| 699 |
+
array([1, 1, 0, 2, 1])
|
| 700 |
+
>>> uniques
|
| 701 |
+
array(['a', 'b', 'c'], dtype=object)
|
| 702 |
+
|
| 703 |
+
When ``use_na_sentinel=True`` (the default), missing values are indicated in
|
| 704 |
+
the `codes` with the sentinel value ``-1`` and missing values are not
|
| 705 |
+
included in `uniques`.
|
| 706 |
+
|
| 707 |
+
>>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"))
|
| 708 |
+
>>> codes
|
| 709 |
+
array([ 0, -1, 1, 2, 0])
|
| 710 |
+
>>> uniques
|
| 711 |
+
array(['b', 'a', 'c'], dtype=object)
|
| 712 |
+
|
| 713 |
+
Thus far, we've only factorized lists (which are internally coerced to
|
| 714 |
+
NumPy arrays). When factorizing pandas objects, the type of `uniques`
|
| 715 |
+
will differ. For Categoricals, a `Categorical` is returned.
|
| 716 |
+
|
| 717 |
+
>>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
|
| 718 |
+
>>> codes, uniques = pd.factorize(cat)
|
| 719 |
+
>>> codes
|
| 720 |
+
array([0, 0, 1])
|
| 721 |
+
>>> uniques
|
| 722 |
+
['a', 'c']
|
| 723 |
+
Categories (3, object): ['a', 'b', 'c']
|
| 724 |
+
|
| 725 |
+
Notice that ``'b'`` is in ``uniques.categories``, despite not being
|
| 726 |
+
present in ``cat.values``.
|
| 727 |
+
|
| 728 |
+
For all other pandas objects, an Index of the appropriate type is
|
| 729 |
+
returned.
|
| 730 |
+
|
| 731 |
+
>>> cat = pd.Series(['a', 'a', 'c'])
|
| 732 |
+
>>> codes, uniques = pd.factorize(cat)
|
| 733 |
+
>>> codes
|
| 734 |
+
array([0, 0, 1])
|
| 735 |
+
>>> uniques
|
| 736 |
+
Index(['a', 'c'], dtype='object')
|
| 737 |
+
|
| 738 |
+
If NaN is in the values, and we want to include NaN in the uniques of the
|
| 739 |
+
values, it can be achieved by setting ``use_na_sentinel=False``.
|
| 740 |
+
|
| 741 |
+
>>> values = np.array([1, 2, 1, np.nan])
|
| 742 |
+
>>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True
|
| 743 |
+
>>> codes
|
| 744 |
+
array([ 0, 1, 0, -1])
|
| 745 |
+
>>> uniques
|
| 746 |
+
array([1., 2.])
|
| 747 |
+
|
| 748 |
+
>>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
|
| 749 |
+
>>> codes
|
| 750 |
+
array([0, 1, 0, 2])
|
| 751 |
+
>>> uniques
|
| 752 |
+
array([ 1., 2., nan])
|
| 753 |
+
"""
|
| 754 |
+
# Implementation notes: This method is responsible for 3 things
|
| 755 |
+
# 1.) coercing data to array-like (ndarray, Index, extension array)
|
| 756 |
+
# 2.) factorizing codes and uniques
|
| 757 |
+
# 3.) Maybe boxing the uniques in an Index
|
| 758 |
+
#
|
| 759 |
+
# Step 2 is dispatched to extension types (like Categorical). They are
|
| 760 |
+
# responsible only for factorization. All data coercion, sorting and boxing
|
| 761 |
+
# should happen here.
|
| 762 |
+
if isinstance(values, (ABCIndex, ABCSeries)):
|
| 763 |
+
return values.factorize(sort=sort, use_na_sentinel=use_na_sentinel)
|
| 764 |
+
|
| 765 |
+
values = _ensure_arraylike(values, func_name="factorize")
|
| 766 |
+
original = values
|
| 767 |
+
|
| 768 |
+
if (
|
| 769 |
+
isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray))
|
| 770 |
+
and values.freq is not None
|
| 771 |
+
):
|
| 772 |
+
# The presence of 'freq' means we can fast-path sorting and know there
|
| 773 |
+
# aren't NAs
|
| 774 |
+
codes, uniques = values.factorize(sort=sort)
|
| 775 |
+
return codes, uniques
|
| 776 |
+
|
| 777 |
+
elif not isinstance(values, np.ndarray):
|
| 778 |
+
# i.e. ExtensionArray
|
| 779 |
+
codes, uniques = values.factorize(use_na_sentinel=use_na_sentinel)
|
| 780 |
+
|
| 781 |
+
else:
|
| 782 |
+
values = np.asarray(values) # convert DTA/TDA/MultiIndex
|
| 783 |
+
|
| 784 |
+
if not use_na_sentinel and values.dtype == object:
|
| 785 |
+
# factorize can now handle differentiating various types of null values.
|
| 786 |
+
# These can only occur when the array has object dtype.
|
| 787 |
+
# However, for backwards compatibility we only use the null for the
|
| 788 |
+
# provided dtype. This may be revisited in the future, see GH#48476.
|
| 789 |
+
null_mask = isna(values)
|
| 790 |
+
if null_mask.any():
|
| 791 |
+
na_value = na_value_for_dtype(values.dtype, compat=False)
|
| 792 |
+
# Don't modify (potentially user-provided) array
|
| 793 |
+
values = np.where(null_mask, na_value, values)
|
| 794 |
+
|
| 795 |
+
codes, uniques = factorize_array(
|
| 796 |
+
values,
|
| 797 |
+
use_na_sentinel=use_na_sentinel,
|
| 798 |
+
size_hint=size_hint,
|
| 799 |
+
)
|
| 800 |
+
|
| 801 |
+
if sort and len(uniques) > 0:
|
| 802 |
+
uniques, codes = safe_sort(
|
| 803 |
+
uniques,
|
| 804 |
+
codes,
|
| 805 |
+
use_na_sentinel=use_na_sentinel,
|
| 806 |
+
assume_unique=True,
|
| 807 |
+
verify=False,
|
| 808 |
+
)
|
| 809 |
+
|
| 810 |
+
uniques = _reconstruct_data(uniques, original.dtype, original)
|
| 811 |
+
|
| 812 |
+
return codes, uniques
|
| 813 |
+
|
| 814 |
+
|
| 815 |
+
def value_counts(
|
| 816 |
+
values,
|
| 817 |
+
sort: bool = True,
|
| 818 |
+
ascending: bool = False,
|
| 819 |
+
normalize: bool = False,
|
| 820 |
+
bins=None,
|
| 821 |
+
dropna: bool = True,
|
| 822 |
+
) -> Series:
|
| 823 |
+
"""
|
| 824 |
+
Compute a histogram of the counts of non-null values.
|
| 825 |
+
|
| 826 |
+
Parameters
|
| 827 |
+
----------
|
| 828 |
+
values : ndarray (1-d)
|
| 829 |
+
sort : bool, default True
|
| 830 |
+
Sort by values
|
| 831 |
+
ascending : bool, default False
|
| 832 |
+
Sort in ascending order
|
| 833 |
+
normalize: bool, default False
|
| 834 |
+
If True then compute a relative histogram
|
| 835 |
+
bins : integer, optional
|
| 836 |
+
Rather than count values, group them into half-open bins,
|
| 837 |
+
convenience for pd.cut, only works with numeric data
|
| 838 |
+
dropna : bool, default True
|
| 839 |
+
Don't include counts of NaN
|
| 840 |
+
|
| 841 |
+
Returns
|
| 842 |
+
-------
|
| 843 |
+
Series
|
| 844 |
+
"""
|
| 845 |
+
warnings.warn(
|
| 846 |
+
# GH#53493
|
| 847 |
+
"pandas.value_counts is deprecated and will be removed in a "
|
| 848 |
+
"future version. Use pd.Series(obj).value_counts() instead.",
|
| 849 |
+
FutureWarning,
|
| 850 |
+
stacklevel=find_stack_level(),
|
| 851 |
+
)
|
| 852 |
+
return value_counts_internal(
|
| 853 |
+
values,
|
| 854 |
+
sort=sort,
|
| 855 |
+
ascending=ascending,
|
| 856 |
+
normalize=normalize,
|
| 857 |
+
bins=bins,
|
| 858 |
+
dropna=dropna,
|
| 859 |
+
)
|
| 860 |
+
|
| 861 |
+
|
| 862 |
+
def value_counts_internal(
|
| 863 |
+
values,
|
| 864 |
+
sort: bool = True,
|
| 865 |
+
ascending: bool = False,
|
| 866 |
+
normalize: bool = False,
|
| 867 |
+
bins=None,
|
| 868 |
+
dropna: bool = True,
|
| 869 |
+
) -> Series:
|
| 870 |
+
from pandas import (
|
| 871 |
+
Index,
|
| 872 |
+
Series,
|
| 873 |
+
)
|
| 874 |
+
|
| 875 |
+
index_name = getattr(values, "name", None)
|
| 876 |
+
name = "proportion" if normalize else "count"
|
| 877 |
+
|
| 878 |
+
if bins is not None:
|
| 879 |
+
from pandas.core.reshape.tile import cut
|
| 880 |
+
|
| 881 |
+
if isinstance(values, Series):
|
| 882 |
+
values = values._values
|
| 883 |
+
|
| 884 |
+
try:
|
| 885 |
+
ii = cut(values, bins, include_lowest=True)
|
| 886 |
+
except TypeError as err:
|
| 887 |
+
raise TypeError("bins argument only works with numeric data.") from err
|
| 888 |
+
|
| 889 |
+
# count, remove nulls (from the index), and but the bins
|
| 890 |
+
result = ii.value_counts(dropna=dropna)
|
| 891 |
+
result.name = name
|
| 892 |
+
result = result[result.index.notna()]
|
| 893 |
+
result.index = result.index.astype("interval")
|
| 894 |
+
result = result.sort_index()
|
| 895 |
+
|
| 896 |
+
# if we are dropna and we have NO values
|
| 897 |
+
if dropna and (result._values == 0).all():
|
| 898 |
+
result = result.iloc[0:0]
|
| 899 |
+
|
| 900 |
+
# normalizing is by len of all (regardless of dropna)
|
| 901 |
+
counts = np.array([len(ii)])
|
| 902 |
+
|
| 903 |
+
else:
|
| 904 |
+
if is_extension_array_dtype(values):
|
| 905 |
+
# handle Categorical and sparse,
|
| 906 |
+
result = Series(values, copy=False)._values.value_counts(dropna=dropna)
|
| 907 |
+
result.name = name
|
| 908 |
+
result.index.name = index_name
|
| 909 |
+
counts = result._values
|
| 910 |
+
if not isinstance(counts, np.ndarray):
|
| 911 |
+
# e.g. ArrowExtensionArray
|
| 912 |
+
counts = np.asarray(counts)
|
| 913 |
+
|
| 914 |
+
elif isinstance(values, ABCMultiIndex):
|
| 915 |
+
# GH49558
|
| 916 |
+
levels = list(range(values.nlevels))
|
| 917 |
+
result = (
|
| 918 |
+
Series(index=values, name=name)
|
| 919 |
+
.groupby(level=levels, dropna=dropna)
|
| 920 |
+
.size()
|
| 921 |
+
)
|
| 922 |
+
result.index.names = values.names
|
| 923 |
+
counts = result._values
|
| 924 |
+
|
| 925 |
+
else:
|
| 926 |
+
values = _ensure_arraylike(values, func_name="value_counts")
|
| 927 |
+
keys, counts, _ = value_counts_arraylike(values, dropna)
|
| 928 |
+
if keys.dtype == np.float16:
|
| 929 |
+
keys = keys.astype(np.float32)
|
| 930 |
+
|
| 931 |
+
# For backwards compatibility, we let Index do its normal type
|
| 932 |
+
# inference, _except_ for if if infers from object to bool.
|
| 933 |
+
idx = Index(keys)
|
| 934 |
+
if idx.dtype == bool and keys.dtype == object:
|
| 935 |
+
idx = idx.astype(object)
|
| 936 |
+
elif (
|
| 937 |
+
idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714
|
| 938 |
+
and idx.dtype != "string[pyarrow_numpy]"
|
| 939 |
+
):
|
| 940 |
+
warnings.warn(
|
| 941 |
+
# GH#56161
|
| 942 |
+
"The behavior of value_counts with object-dtype is deprecated. "
|
| 943 |
+
"In a future version, this will *not* perform dtype inference "
|
| 944 |
+
"on the resulting index. To retain the old behavior, use "
|
| 945 |
+
"`result.index = result.index.infer_objects()`",
|
| 946 |
+
FutureWarning,
|
| 947 |
+
stacklevel=find_stack_level(),
|
| 948 |
+
)
|
| 949 |
+
idx.name = index_name
|
| 950 |
+
|
| 951 |
+
result = Series(counts, index=idx, name=name, copy=False)
|
| 952 |
+
|
| 953 |
+
if sort:
|
| 954 |
+
result = result.sort_values(ascending=ascending)
|
| 955 |
+
|
| 956 |
+
if normalize:
|
| 957 |
+
result = result / counts.sum()
|
| 958 |
+
|
| 959 |
+
return result
|
| 960 |
+
|
| 961 |
+
|
| 962 |
+
# Called once from SparseArray, otherwise could be private
|
| 963 |
+
def value_counts_arraylike(
|
| 964 |
+
values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = None
|
| 965 |
+
) -> tuple[ArrayLike, npt.NDArray[np.int64], int]:
|
| 966 |
+
"""
|
| 967 |
+
Parameters
|
| 968 |
+
----------
|
| 969 |
+
values : np.ndarray
|
| 970 |
+
dropna : bool
|
| 971 |
+
mask : np.ndarray[bool] or None, default None
|
| 972 |
+
|
| 973 |
+
Returns
|
| 974 |
+
-------
|
| 975 |
+
uniques : np.ndarray
|
| 976 |
+
counts : np.ndarray[np.int64]
|
| 977 |
+
"""
|
| 978 |
+
original = values
|
| 979 |
+
values = _ensure_data(values)
|
| 980 |
+
|
| 981 |
+
keys, counts, na_counter = htable.value_count(values, dropna, mask=mask)
|
| 982 |
+
|
| 983 |
+
if needs_i8_conversion(original.dtype):
|
| 984 |
+
# datetime, timedelta, or period
|
| 985 |
+
|
| 986 |
+
if dropna:
|
| 987 |
+
mask = keys != iNaT
|
| 988 |
+
keys, counts = keys[mask], counts[mask]
|
| 989 |
+
|
| 990 |
+
res_keys = _reconstruct_data(keys, original.dtype, original)
|
| 991 |
+
return res_keys, counts, na_counter
|
| 992 |
+
|
| 993 |
+
|
| 994 |
+
def duplicated(
|
| 995 |
+
values: ArrayLike,
|
| 996 |
+
keep: Literal["first", "last", False] = "first",
|
| 997 |
+
mask: npt.NDArray[np.bool_] | None = None,
|
| 998 |
+
) -> npt.NDArray[np.bool_]:
|
| 999 |
+
"""
|
| 1000 |
+
Return boolean ndarray denoting duplicate values.
|
| 1001 |
+
|
| 1002 |
+
Parameters
|
| 1003 |
+
----------
|
| 1004 |
+
values : np.ndarray or ExtensionArray
|
| 1005 |
+
Array over which to check for duplicate values.
|
| 1006 |
+
keep : {'first', 'last', False}, default 'first'
|
| 1007 |
+
- ``first`` : Mark duplicates as ``True`` except for the first
|
| 1008 |
+
occurrence.
|
| 1009 |
+
- ``last`` : Mark duplicates as ``True`` except for the last
|
| 1010 |
+
occurrence.
|
| 1011 |
+
- False : Mark all duplicates as ``True``.
|
| 1012 |
+
mask : ndarray[bool], optional
|
| 1013 |
+
array indicating which elements to exclude from checking
|
| 1014 |
+
|
| 1015 |
+
Returns
|
| 1016 |
+
-------
|
| 1017 |
+
duplicated : ndarray[bool]
|
| 1018 |
+
"""
|
| 1019 |
+
values = _ensure_data(values)
|
| 1020 |
+
return htable.duplicated(values, keep=keep, mask=mask)
|
| 1021 |
+
|
| 1022 |
+
|
| 1023 |
+
def mode(
|
| 1024 |
+
values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
|
| 1025 |
+
) -> ArrayLike:
|
| 1026 |
+
"""
|
| 1027 |
+
Returns the mode(s) of an array.
|
| 1028 |
+
|
| 1029 |
+
Parameters
|
| 1030 |
+
----------
|
| 1031 |
+
values : array-like
|
| 1032 |
+
Array over which to check for duplicate values.
|
| 1033 |
+
dropna : bool, default True
|
| 1034 |
+
Don't consider counts of NaN/NaT.
|
| 1035 |
+
|
| 1036 |
+
Returns
|
| 1037 |
+
-------
|
| 1038 |
+
np.ndarray or ExtensionArray
|
| 1039 |
+
"""
|
| 1040 |
+
values = _ensure_arraylike(values, func_name="mode")
|
| 1041 |
+
original = values
|
| 1042 |
+
|
| 1043 |
+
if needs_i8_conversion(values.dtype):
|
| 1044 |
+
# Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray.
|
| 1045 |
+
values = ensure_wrapped_if_datetimelike(values)
|
| 1046 |
+
values = cast("ExtensionArray", values)
|
| 1047 |
+
return values._mode(dropna=dropna)
|
| 1048 |
+
|
| 1049 |
+
values = _ensure_data(values)
|
| 1050 |
+
|
| 1051 |
+
npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
|
| 1052 |
+
if res_mask is not None:
|
| 1053 |
+
return npresult, res_mask # type: ignore[return-value]
|
| 1054 |
+
|
| 1055 |
+
try:
|
| 1056 |
+
npresult = np.sort(npresult)
|
| 1057 |
+
except TypeError as err:
|
| 1058 |
+
warnings.warn(
|
| 1059 |
+
f"Unable to sort modes: {err}",
|
| 1060 |
+
stacklevel=find_stack_level(),
|
| 1061 |
+
)
|
| 1062 |
+
|
| 1063 |
+
result = _reconstruct_data(npresult, original.dtype, original)
|
| 1064 |
+
return result
|
| 1065 |
+
|
| 1066 |
+
|
| 1067 |
+
def rank(
|
| 1068 |
+
values: ArrayLike,
|
| 1069 |
+
axis: AxisInt = 0,
|
| 1070 |
+
method: str = "average",
|
| 1071 |
+
na_option: str = "keep",
|
| 1072 |
+
ascending: bool = True,
|
| 1073 |
+
pct: bool = False,
|
| 1074 |
+
) -> npt.NDArray[np.float64]:
|
| 1075 |
+
"""
|
| 1076 |
+
Rank the values along a given axis.
|
| 1077 |
+
|
| 1078 |
+
Parameters
|
| 1079 |
+
----------
|
| 1080 |
+
values : np.ndarray or ExtensionArray
|
| 1081 |
+
Array whose values will be ranked. The number of dimensions in this
|
| 1082 |
+
array must not exceed 2.
|
| 1083 |
+
axis : int, default 0
|
| 1084 |
+
Axis over which to perform rankings.
|
| 1085 |
+
method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
|
| 1086 |
+
The method by which tiebreaks are broken during the ranking.
|
| 1087 |
+
na_option : {'keep', 'top'}, default 'keep'
|
| 1088 |
+
The method by which NaNs are placed in the ranking.
|
| 1089 |
+
- ``keep``: rank each NaN value with a NaN ranking
|
| 1090 |
+
- ``top``: replace each NaN with either +/- inf so that they
|
| 1091 |
+
there are ranked at the top
|
| 1092 |
+
ascending : bool, default True
|
| 1093 |
+
Whether or not the elements should be ranked in ascending order.
|
| 1094 |
+
pct : bool, default False
|
| 1095 |
+
Whether or not to the display the returned rankings in integer form
|
| 1096 |
+
(e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
|
| 1097 |
+
"""
|
| 1098 |
+
is_datetimelike = needs_i8_conversion(values.dtype)
|
| 1099 |
+
values = _ensure_data(values)
|
| 1100 |
+
|
| 1101 |
+
if values.ndim == 1:
|
| 1102 |
+
ranks = algos.rank_1d(
|
| 1103 |
+
values,
|
| 1104 |
+
is_datetimelike=is_datetimelike,
|
| 1105 |
+
ties_method=method,
|
| 1106 |
+
ascending=ascending,
|
| 1107 |
+
na_option=na_option,
|
| 1108 |
+
pct=pct,
|
| 1109 |
+
)
|
| 1110 |
+
elif values.ndim == 2:
|
| 1111 |
+
ranks = algos.rank_2d(
|
| 1112 |
+
values,
|
| 1113 |
+
axis=axis,
|
| 1114 |
+
is_datetimelike=is_datetimelike,
|
| 1115 |
+
ties_method=method,
|
| 1116 |
+
ascending=ascending,
|
| 1117 |
+
na_option=na_option,
|
| 1118 |
+
pct=pct,
|
| 1119 |
+
)
|
| 1120 |
+
else:
|
| 1121 |
+
raise TypeError("Array with ndim > 2 are not supported.")
|
| 1122 |
+
|
| 1123 |
+
return ranks
|
| 1124 |
+
|
| 1125 |
+
|
| 1126 |
+
# ---- #
|
| 1127 |
+
# take #
|
| 1128 |
+
# ---- #
|
| 1129 |
+
|
| 1130 |
+
|
| 1131 |
+
def take(
|
| 1132 |
+
arr,
|
| 1133 |
+
indices: TakeIndexer,
|
| 1134 |
+
axis: AxisInt = 0,
|
| 1135 |
+
allow_fill: bool = False,
|
| 1136 |
+
fill_value=None,
|
| 1137 |
+
):
|
| 1138 |
+
"""
|
| 1139 |
+
Take elements from an array.
|
| 1140 |
+
|
| 1141 |
+
Parameters
|
| 1142 |
+
----------
|
| 1143 |
+
arr : array-like or scalar value
|
| 1144 |
+
Non array-likes (sequences/scalars without a dtype) are coerced
|
| 1145 |
+
to an ndarray.
|
| 1146 |
+
|
| 1147 |
+
.. deprecated:: 2.1.0
|
| 1148 |
+
Passing an argument other than a numpy.ndarray, ExtensionArray,
|
| 1149 |
+
Index, or Series is deprecated.
|
| 1150 |
+
|
| 1151 |
+
indices : sequence of int or one-dimensional np.ndarray of int
|
| 1152 |
+
Indices to be taken.
|
| 1153 |
+
axis : int, default 0
|
| 1154 |
+
The axis over which to select values.
|
| 1155 |
+
allow_fill : bool, default False
|
| 1156 |
+
How to handle negative values in `indices`.
|
| 1157 |
+
|
| 1158 |
+
* False: negative values in `indices` indicate positional indices
|
| 1159 |
+
from the right (the default). This is similar to :func:`numpy.take`.
|
| 1160 |
+
|
| 1161 |
+
* True: negative values in `indices` indicate
|
| 1162 |
+
missing values. These values are set to `fill_value`. Any other
|
| 1163 |
+
negative values raise a ``ValueError``.
|
| 1164 |
+
|
| 1165 |
+
fill_value : any, optional
|
| 1166 |
+
Fill value to use for NA-indices when `allow_fill` is True.
|
| 1167 |
+
This may be ``None``, in which case the default NA value for
|
| 1168 |
+
the type (``self.dtype.na_value``) is used.
|
| 1169 |
+
|
| 1170 |
+
For multi-dimensional `arr`, each *element* is filled with
|
| 1171 |
+
`fill_value`.
|
| 1172 |
+
|
| 1173 |
+
Returns
|
| 1174 |
+
-------
|
| 1175 |
+
ndarray or ExtensionArray
|
| 1176 |
+
Same type as the input.
|
| 1177 |
+
|
| 1178 |
+
Raises
|
| 1179 |
+
------
|
| 1180 |
+
IndexError
|
| 1181 |
+
When `indices` is out of bounds for the array.
|
| 1182 |
+
ValueError
|
| 1183 |
+
When the indexer contains negative values other than ``-1``
|
| 1184 |
+
and `allow_fill` is True.
|
| 1185 |
+
|
| 1186 |
+
Notes
|
| 1187 |
+
-----
|
| 1188 |
+
When `allow_fill` is False, `indices` may be whatever dimensionality
|
| 1189 |
+
is accepted by NumPy for `arr`.
|
| 1190 |
+
|
| 1191 |
+
When `allow_fill` is True, `indices` should be 1-D.
|
| 1192 |
+
|
| 1193 |
+
See Also
|
| 1194 |
+
--------
|
| 1195 |
+
numpy.take : Take elements from an array along an axis.
|
| 1196 |
+
|
| 1197 |
+
Examples
|
| 1198 |
+
--------
|
| 1199 |
+
>>> import pandas as pd
|
| 1200 |
+
|
| 1201 |
+
With the default ``allow_fill=False``, negative numbers indicate
|
| 1202 |
+
positional indices from the right.
|
| 1203 |
+
|
| 1204 |
+
>>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1])
|
| 1205 |
+
array([10, 10, 30])
|
| 1206 |
+
|
| 1207 |
+
Setting ``allow_fill=True`` will place `fill_value` in those positions.
|
| 1208 |
+
|
| 1209 |
+
>>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
|
| 1210 |
+
array([10., 10., nan])
|
| 1211 |
+
|
| 1212 |
+
>>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
|
| 1213 |
+
... fill_value=-10)
|
| 1214 |
+
array([ 10, 10, -10])
|
| 1215 |
+
"""
|
| 1216 |
+
if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)):
|
| 1217 |
+
# GH#52981
|
| 1218 |
+
warnings.warn(
|
| 1219 |
+
"pd.api.extensions.take accepting non-standard inputs is deprecated "
|
| 1220 |
+
"and will raise in a future version. Pass either a numpy.ndarray, "
|
| 1221 |
+
"ExtensionArray, Index, or Series instead.",
|
| 1222 |
+
FutureWarning,
|
| 1223 |
+
stacklevel=find_stack_level(),
|
| 1224 |
+
)
|
| 1225 |
+
|
| 1226 |
+
if not is_array_like(arr):
|
| 1227 |
+
arr = np.asarray(arr)
|
| 1228 |
+
|
| 1229 |
+
indices = ensure_platform_int(indices)
|
| 1230 |
+
|
| 1231 |
+
if allow_fill:
|
| 1232 |
+
# Pandas style, -1 means NA
|
| 1233 |
+
validate_indices(indices, arr.shape[axis])
|
| 1234 |
+
result = take_nd(
|
| 1235 |
+
arr, indices, axis=axis, allow_fill=True, fill_value=fill_value
|
| 1236 |
+
)
|
| 1237 |
+
else:
|
| 1238 |
+
# NumPy style
|
| 1239 |
+
result = arr.take(indices, axis=axis)
|
| 1240 |
+
return result
|
| 1241 |
+
|
| 1242 |
+
|
| 1243 |
+
# ------------ #
|
| 1244 |
+
# searchsorted #
|
| 1245 |
+
# ------------ #
|
| 1246 |
+
|
| 1247 |
+
|
| 1248 |
+
def searchsorted(
|
| 1249 |
+
arr: ArrayLike,
|
| 1250 |
+
value: NumpyValueArrayLike | ExtensionArray,
|
| 1251 |
+
side: Literal["left", "right"] = "left",
|
| 1252 |
+
sorter: NumpySorter | None = None,
|
| 1253 |
+
) -> npt.NDArray[np.intp] | np.intp:
|
| 1254 |
+
"""
|
| 1255 |
+
Find indices where elements should be inserted to maintain order.
|
| 1256 |
+
|
| 1257 |
+
Find the indices into a sorted array `arr` (a) such that, if the
|
| 1258 |
+
corresponding elements in `value` were inserted before the indices,
|
| 1259 |
+
the order of `arr` would be preserved.
|
| 1260 |
+
|
| 1261 |
+
Assuming that `arr` is sorted:
|
| 1262 |
+
|
| 1263 |
+
====== ================================
|
| 1264 |
+
`side` returned index `i` satisfies
|
| 1265 |
+
====== ================================
|
| 1266 |
+
left ``arr[i-1] < value <= self[i]``
|
| 1267 |
+
right ``arr[i-1] <= value < self[i]``
|
| 1268 |
+
====== ================================
|
| 1269 |
+
|
| 1270 |
+
Parameters
|
| 1271 |
+
----------
|
| 1272 |
+
arr: np.ndarray, ExtensionArray, Series
|
| 1273 |
+
Input array. If `sorter` is None, then it must be sorted in
|
| 1274 |
+
ascending order, otherwise `sorter` must be an array of indices
|
| 1275 |
+
that sort it.
|
| 1276 |
+
value : array-like or scalar
|
| 1277 |
+
Values to insert into `arr`.
|
| 1278 |
+
side : {'left', 'right'}, optional
|
| 1279 |
+
If 'left', the index of the first suitable location found is given.
|
| 1280 |
+
If 'right', return the last such index. If there is no suitable
|
| 1281 |
+
index, return either 0 or N (where N is the length of `self`).
|
| 1282 |
+
sorter : 1-D array-like, optional
|
| 1283 |
+
Optional array of integer indices that sort array a into ascending
|
| 1284 |
+
order. They are typically the result of argsort.
|
| 1285 |
+
|
| 1286 |
+
Returns
|
| 1287 |
+
-------
|
| 1288 |
+
array of ints or int
|
| 1289 |
+
If value is array-like, array of insertion points.
|
| 1290 |
+
If value is scalar, a single integer.
|
| 1291 |
+
|
| 1292 |
+
See Also
|
| 1293 |
+
--------
|
| 1294 |
+
numpy.searchsorted : Similar method from NumPy.
|
| 1295 |
+
"""
|
| 1296 |
+
if sorter is not None:
|
| 1297 |
+
sorter = ensure_platform_int(sorter)
|
| 1298 |
+
|
| 1299 |
+
if (
|
| 1300 |
+
isinstance(arr, np.ndarray)
|
| 1301 |
+
and arr.dtype.kind in "iu"
|
| 1302 |
+
and (is_integer(value) or is_integer_dtype(value))
|
| 1303 |
+
):
|
| 1304 |
+
# if `arr` and `value` have different dtypes, `arr` would be
|
| 1305 |
+
# recast by numpy, causing a slow search.
|
| 1306 |
+
# Before searching below, we therefore try to give `value` the
|
| 1307 |
+
# same dtype as `arr`, while guarding against integer overflows.
|
| 1308 |
+
iinfo = np.iinfo(arr.dtype.type)
|
| 1309 |
+
value_arr = np.array([value]) if is_integer(value) else np.array(value)
|
| 1310 |
+
if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
|
| 1311 |
+
# value within bounds, so no overflow, so can convert value dtype
|
| 1312 |
+
# to dtype of arr
|
| 1313 |
+
dtype = arr.dtype
|
| 1314 |
+
else:
|
| 1315 |
+
dtype = value_arr.dtype
|
| 1316 |
+
|
| 1317 |
+
if is_integer(value):
|
| 1318 |
+
# We know that value is int
|
| 1319 |
+
value = cast(int, dtype.type(value))
|
| 1320 |
+
else:
|
| 1321 |
+
value = pd_array(cast(ArrayLike, value), dtype=dtype)
|
| 1322 |
+
else:
|
| 1323 |
+
# E.g. if `arr` is an array with dtype='datetime64[ns]'
|
| 1324 |
+
# and `value` is a pd.Timestamp, we may need to convert value
|
| 1325 |
+
arr = ensure_wrapped_if_datetimelike(arr)
|
| 1326 |
+
|
| 1327 |
+
# Argument 1 to "searchsorted" of "ndarray" has incompatible type
|
| 1328 |
+
# "Union[NumpyValueArrayLike, ExtensionArray]"; expected "NumpyValueArrayLike"
|
| 1329 |
+
return arr.searchsorted(value, side=side, sorter=sorter) # type: ignore[arg-type]
|
| 1330 |
+
|
| 1331 |
+
|
| 1332 |
+
# ---- #
|
| 1333 |
+
# diff #
|
| 1334 |
+
# ---- #
|
| 1335 |
+
|
| 1336 |
+
_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"}
|
| 1337 |
+
|
| 1338 |
+
|
| 1339 |
+
def diff(arr, n: int, axis: AxisInt = 0):
|
| 1340 |
+
"""
|
| 1341 |
+
difference of n between self,
|
| 1342 |
+
analogous to s-s.shift(n)
|
| 1343 |
+
|
| 1344 |
+
Parameters
|
| 1345 |
+
----------
|
| 1346 |
+
arr : ndarray or ExtensionArray
|
| 1347 |
+
n : int
|
| 1348 |
+
number of periods
|
| 1349 |
+
axis : {0, 1}
|
| 1350 |
+
axis to shift on
|
| 1351 |
+
stacklevel : int, default 3
|
| 1352 |
+
The stacklevel for the lost dtype warning.
|
| 1353 |
+
|
| 1354 |
+
Returns
|
| 1355 |
+
-------
|
| 1356 |
+
shifted
|
| 1357 |
+
"""
|
| 1358 |
+
|
| 1359 |
+
n = int(n)
|
| 1360 |
+
na = np.nan
|
| 1361 |
+
dtype = arr.dtype
|
| 1362 |
+
|
| 1363 |
+
is_bool = is_bool_dtype(dtype)
|
| 1364 |
+
if is_bool:
|
| 1365 |
+
op = operator.xor
|
| 1366 |
+
else:
|
| 1367 |
+
op = operator.sub
|
| 1368 |
+
|
| 1369 |
+
if isinstance(dtype, NumpyEADtype):
|
| 1370 |
+
# NumpyExtensionArray cannot necessarily hold shifted versions of itself.
|
| 1371 |
+
arr = arr.to_numpy()
|
| 1372 |
+
dtype = arr.dtype
|
| 1373 |
+
|
| 1374 |
+
if not isinstance(arr, np.ndarray):
|
| 1375 |
+
# i.e ExtensionArray
|
| 1376 |
+
if hasattr(arr, f"__{op.__name__}__"):
|
| 1377 |
+
if axis != 0:
|
| 1378 |
+
raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
|
| 1379 |
+
return op(arr, arr.shift(n))
|
| 1380 |
+
else:
|
| 1381 |
+
raise TypeError(
|
| 1382 |
+
f"{type(arr).__name__} has no 'diff' method. "
|
| 1383 |
+
"Convert to a suitable dtype prior to calling 'diff'."
|
| 1384 |
+
)
|
| 1385 |
+
|
| 1386 |
+
is_timedelta = False
|
| 1387 |
+
if arr.dtype.kind in "mM":
|
| 1388 |
+
dtype = np.int64
|
| 1389 |
+
arr = arr.view("i8")
|
| 1390 |
+
na = iNaT
|
| 1391 |
+
is_timedelta = True
|
| 1392 |
+
|
| 1393 |
+
elif is_bool:
|
| 1394 |
+
# We have to cast in order to be able to hold np.nan
|
| 1395 |
+
dtype = np.object_
|
| 1396 |
+
|
| 1397 |
+
elif dtype.kind in "iu":
|
| 1398 |
+
# We have to cast in order to be able to hold np.nan
|
| 1399 |
+
|
| 1400 |
+
# int8, int16 are incompatible with float64,
|
| 1401 |
+
# see https://github.com/cython/cython/issues/2646
|
| 1402 |
+
if arr.dtype.name in ["int8", "int16"]:
|
| 1403 |
+
dtype = np.float32
|
| 1404 |
+
else:
|
| 1405 |
+
dtype = np.float64
|
| 1406 |
+
|
| 1407 |
+
orig_ndim = arr.ndim
|
| 1408 |
+
if orig_ndim == 1:
|
| 1409 |
+
# reshape so we can always use algos.diff_2d
|
| 1410 |
+
arr = arr.reshape(-1, 1)
|
| 1411 |
+
# TODO: require axis == 0
|
| 1412 |
+
|
| 1413 |
+
dtype = np.dtype(dtype)
|
| 1414 |
+
out_arr = np.empty(arr.shape, dtype=dtype)
|
| 1415 |
+
|
| 1416 |
+
na_indexer = [slice(None)] * 2
|
| 1417 |
+
na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
|
| 1418 |
+
out_arr[tuple(na_indexer)] = na
|
| 1419 |
+
|
| 1420 |
+
if arr.dtype.name in _diff_special:
|
| 1421 |
+
# TODO: can diff_2d dtype specialization troubles be fixed by defining
|
| 1422 |
+
# out_arr inside diff_2d?
|
| 1423 |
+
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
|
| 1424 |
+
else:
|
| 1425 |
+
# To keep mypy happy, _res_indexer is a list while res_indexer is
|
| 1426 |
+
# a tuple, ditto for lag_indexer.
|
| 1427 |
+
_res_indexer = [slice(None)] * 2
|
| 1428 |
+
_res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
|
| 1429 |
+
res_indexer = tuple(_res_indexer)
|
| 1430 |
+
|
| 1431 |
+
_lag_indexer = [slice(None)] * 2
|
| 1432 |
+
_lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
|
| 1433 |
+
lag_indexer = tuple(_lag_indexer)
|
| 1434 |
+
|
| 1435 |
+
out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer])
|
| 1436 |
+
|
| 1437 |
+
if is_timedelta:
|
| 1438 |
+
out_arr = out_arr.view("timedelta64[ns]")
|
| 1439 |
+
|
| 1440 |
+
if orig_ndim == 1:
|
| 1441 |
+
out_arr = out_arr[:, 0]
|
| 1442 |
+
return out_arr
|
| 1443 |
+
|
| 1444 |
+
|
| 1445 |
+
# --------------------------------------------------------------------
|
| 1446 |
+
# Helper functions
|
| 1447 |
+
|
| 1448 |
+
|
| 1449 |
+
# Note: safe_sort is in algorithms.py instead of sorting.py because it is
|
| 1450 |
+
# low-dependency, is used in this module, and used private methods from
|
| 1451 |
+
# this module.
|
| 1452 |
+
def safe_sort(
|
| 1453 |
+
values: Index | ArrayLike,
|
| 1454 |
+
codes: npt.NDArray[np.intp] | None = None,
|
| 1455 |
+
use_na_sentinel: bool = True,
|
| 1456 |
+
assume_unique: bool = False,
|
| 1457 |
+
verify: bool = True,
|
| 1458 |
+
) -> AnyArrayLike | tuple[AnyArrayLike, np.ndarray]:
|
| 1459 |
+
"""
|
| 1460 |
+
Sort ``values`` and reorder corresponding ``codes``.
|
| 1461 |
+
|
| 1462 |
+
``values`` should be unique if ``codes`` is not None.
|
| 1463 |
+
Safe for use with mixed types (int, str), orders ints before strs.
|
| 1464 |
+
|
| 1465 |
+
Parameters
|
| 1466 |
+
----------
|
| 1467 |
+
values : list-like
|
| 1468 |
+
Sequence; must be unique if ``codes`` is not None.
|
| 1469 |
+
codes : np.ndarray[intp] or None, default None
|
| 1470 |
+
Indices to ``values``. All out of bound indices are treated as
|
| 1471 |
+
"not found" and will be masked with ``-1``.
|
| 1472 |
+
use_na_sentinel : bool, default True
|
| 1473 |
+
If True, the sentinel -1 will be used for NaN values. If False,
|
| 1474 |
+
NaN values will be encoded as non-negative integers and will not drop the
|
| 1475 |
+
NaN from the uniques of the values.
|
| 1476 |
+
assume_unique : bool, default False
|
| 1477 |
+
When True, ``values`` are assumed to be unique, which can speed up
|
| 1478 |
+
the calculation. Ignored when ``codes`` is None.
|
| 1479 |
+
verify : bool, default True
|
| 1480 |
+
Check if codes are out of bound for the values and put out of bound
|
| 1481 |
+
codes equal to ``-1``. If ``verify=False``, it is assumed there
|
| 1482 |
+
are no out of bound codes. Ignored when ``codes`` is None.
|
| 1483 |
+
|
| 1484 |
+
Returns
|
| 1485 |
+
-------
|
| 1486 |
+
ordered : AnyArrayLike
|
| 1487 |
+
Sorted ``values``
|
| 1488 |
+
new_codes : ndarray
|
| 1489 |
+
Reordered ``codes``; returned when ``codes`` is not None.
|
| 1490 |
+
|
| 1491 |
+
Raises
|
| 1492 |
+
------
|
| 1493 |
+
TypeError
|
| 1494 |
+
* If ``values`` is not list-like or if ``codes`` is neither None
|
| 1495 |
+
nor list-like
|
| 1496 |
+
* If ``values`` cannot be sorted
|
| 1497 |
+
ValueError
|
| 1498 |
+
* If ``codes`` is not None and ``values`` contain duplicates.
|
| 1499 |
+
"""
|
| 1500 |
+
if not isinstance(values, (np.ndarray, ABCExtensionArray, ABCIndex)):
|
| 1501 |
+
raise TypeError(
|
| 1502 |
+
"Only np.ndarray, ExtensionArray, and Index objects are allowed to "
|
| 1503 |
+
"be passed to safe_sort as values"
|
| 1504 |
+
)
|
| 1505 |
+
|
| 1506 |
+
sorter = None
|
| 1507 |
+
ordered: AnyArrayLike
|
| 1508 |
+
|
| 1509 |
+
if (
|
| 1510 |
+
not isinstance(values.dtype, ExtensionDtype)
|
| 1511 |
+
and lib.infer_dtype(values, skipna=False) == "mixed-integer"
|
| 1512 |
+
):
|
| 1513 |
+
ordered = _sort_mixed(values)
|
| 1514 |
+
else:
|
| 1515 |
+
try:
|
| 1516 |
+
sorter = values.argsort()
|
| 1517 |
+
ordered = values.take(sorter)
|
| 1518 |
+
except (TypeError, decimal.InvalidOperation):
|
| 1519 |
+
# Previous sorters failed or were not applicable, try `_sort_mixed`
|
| 1520 |
+
# which would work, but which fails for special case of 1d arrays
|
| 1521 |
+
# with tuples.
|
| 1522 |
+
if values.size and isinstance(values[0], tuple):
|
| 1523 |
+
# error: Argument 1 to "_sort_tuples" has incompatible type
|
| 1524 |
+
# "Union[Index, ExtensionArray, ndarray[Any, Any]]"; expected
|
| 1525 |
+
# "ndarray[Any, Any]"
|
| 1526 |
+
ordered = _sort_tuples(values) # type: ignore[arg-type]
|
| 1527 |
+
else:
|
| 1528 |
+
ordered = _sort_mixed(values)
|
| 1529 |
+
|
| 1530 |
+
# codes:
|
| 1531 |
+
|
| 1532 |
+
if codes is None:
|
| 1533 |
+
return ordered
|
| 1534 |
+
|
| 1535 |
+
if not is_list_like(codes):
|
| 1536 |
+
raise TypeError(
|
| 1537 |
+
"Only list-like objects or None are allowed to "
|
| 1538 |
+
"be passed to safe_sort as codes"
|
| 1539 |
+
)
|
| 1540 |
+
codes = ensure_platform_int(np.asarray(codes))
|
| 1541 |
+
|
| 1542 |
+
if not assume_unique and not len(unique(values)) == len(values):
|
| 1543 |
+
raise ValueError("values should be unique if codes is not None")
|
| 1544 |
+
|
| 1545 |
+
if sorter is None:
|
| 1546 |
+
# mixed types
|
| 1547 |
+
# error: Argument 1 to "_get_hashtable_algo" has incompatible type
|
| 1548 |
+
# "Union[Index, ExtensionArray, ndarray[Any, Any]]"; expected
|
| 1549 |
+
# "ndarray[Any, Any]"
|
| 1550 |
+
hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type]
|
| 1551 |
+
t = hash_klass(len(values))
|
| 1552 |
+
t.map_locations(values)
|
| 1553 |
+
sorter = ensure_platform_int(t.lookup(ordered))
|
| 1554 |
+
|
| 1555 |
+
if use_na_sentinel:
|
| 1556 |
+
# take_nd is faster, but only works for na_sentinels of -1
|
| 1557 |
+
order2 = sorter.argsort()
|
| 1558 |
+
if verify:
|
| 1559 |
+
mask = (codes < -len(values)) | (codes >= len(values))
|
| 1560 |
+
codes[mask] = 0
|
| 1561 |
+
else:
|
| 1562 |
+
mask = None
|
| 1563 |
+
new_codes = take_nd(order2, codes, fill_value=-1)
|
| 1564 |
+
else:
|
| 1565 |
+
reverse_indexer = np.empty(len(sorter), dtype=int)
|
| 1566 |
+
reverse_indexer.put(sorter, np.arange(len(sorter)))
|
| 1567 |
+
# Out of bound indices will be masked with `-1` next, so we
|
| 1568 |
+
# may deal with them here without performance loss using `mode='wrap'`
|
| 1569 |
+
new_codes = reverse_indexer.take(codes, mode="wrap")
|
| 1570 |
+
|
| 1571 |
+
if use_na_sentinel:
|
| 1572 |
+
mask = codes == -1
|
| 1573 |
+
if verify:
|
| 1574 |
+
mask = mask | (codes < -len(values)) | (codes >= len(values))
|
| 1575 |
+
|
| 1576 |
+
if use_na_sentinel and mask is not None:
|
| 1577 |
+
np.putmask(new_codes, mask, -1)
|
| 1578 |
+
|
| 1579 |
+
return ordered, ensure_platform_int(new_codes)
|
| 1580 |
+
|
| 1581 |
+
|
| 1582 |
+
def _sort_mixed(values) -> AnyArrayLike:
|
| 1583 |
+
"""order ints before strings before nulls in 1d arrays"""
|
| 1584 |
+
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
|
| 1585 |
+
null_pos = np.array([isna(x) for x in values], dtype=bool)
|
| 1586 |
+
num_pos = ~str_pos & ~null_pos
|
| 1587 |
+
str_argsort = np.argsort(values[str_pos])
|
| 1588 |
+
num_argsort = np.argsort(values[num_pos])
|
| 1589 |
+
# convert boolean arrays to positional indices, then order by underlying values
|
| 1590 |
+
str_locs = str_pos.nonzero()[0].take(str_argsort)
|
| 1591 |
+
num_locs = num_pos.nonzero()[0].take(num_argsort)
|
| 1592 |
+
null_locs = null_pos.nonzero()[0]
|
| 1593 |
+
locs = np.concatenate([num_locs, str_locs, null_locs])
|
| 1594 |
+
return values.take(locs)
|
| 1595 |
+
|
| 1596 |
+
|
| 1597 |
+
def _sort_tuples(values: np.ndarray) -> np.ndarray:
|
| 1598 |
+
"""
|
| 1599 |
+
Convert array of tuples (1d) to array of arrays (2d).
|
| 1600 |
+
We need to keep the columns separately as they contain different types and
|
| 1601 |
+
nans (can't use `np.sort` as it may fail when str and nan are mixed in a
|
| 1602 |
+
column as types cannot be compared).
|
| 1603 |
+
"""
|
| 1604 |
+
from pandas.core.internals.construction import to_arrays
|
| 1605 |
+
from pandas.core.sorting import lexsort_indexer
|
| 1606 |
+
|
| 1607 |
+
arrays, _ = to_arrays(values, None)
|
| 1608 |
+
indexer = lexsort_indexer(arrays, orders=True)
|
| 1609 |
+
return values[indexer]
|
| 1610 |
+
|
| 1611 |
+
|
| 1612 |
+
def union_with_duplicates(
|
| 1613 |
+
lvals: ArrayLike | Index, rvals: ArrayLike | Index
|
| 1614 |
+
) -> ArrayLike | Index:
|
| 1615 |
+
"""
|
| 1616 |
+
Extracts the union from lvals and rvals with respect to duplicates and nans in
|
| 1617 |
+
both arrays.
|
| 1618 |
+
|
| 1619 |
+
Parameters
|
| 1620 |
+
----------
|
| 1621 |
+
lvals: np.ndarray or ExtensionArray
|
| 1622 |
+
left values which is ordered in front.
|
| 1623 |
+
rvals: np.ndarray or ExtensionArray
|
| 1624 |
+
right values ordered after lvals.
|
| 1625 |
+
|
| 1626 |
+
Returns
|
| 1627 |
+
-------
|
| 1628 |
+
np.ndarray or ExtensionArray
|
| 1629 |
+
Containing the unsorted union of both arrays.
|
| 1630 |
+
|
| 1631 |
+
Notes
|
| 1632 |
+
-----
|
| 1633 |
+
Caller is responsible for ensuring lvals.dtype == rvals.dtype.
|
| 1634 |
+
"""
|
| 1635 |
+
from pandas import Series
|
| 1636 |
+
|
| 1637 |
+
with warnings.catch_warnings():
|
| 1638 |
+
# filter warning from object dtype inference; we will end up discarding
|
| 1639 |
+
# the index here, so the deprecation does not affect the end result here.
|
| 1640 |
+
warnings.filterwarnings(
|
| 1641 |
+
"ignore",
|
| 1642 |
+
"The behavior of value_counts with object-dtype is deprecated",
|
| 1643 |
+
category=FutureWarning,
|
| 1644 |
+
)
|
| 1645 |
+
l_count = value_counts_internal(lvals, dropna=False)
|
| 1646 |
+
r_count = value_counts_internal(rvals, dropna=False)
|
| 1647 |
+
l_count, r_count = l_count.align(r_count, fill_value=0)
|
| 1648 |
+
final_count = np.maximum(l_count.values, r_count.values)
|
| 1649 |
+
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)
|
| 1650 |
+
if isinstance(lvals, ABCMultiIndex) and isinstance(rvals, ABCMultiIndex):
|
| 1651 |
+
unique_vals = lvals.append(rvals).unique()
|
| 1652 |
+
else:
|
| 1653 |
+
if isinstance(lvals, ABCIndex):
|
| 1654 |
+
lvals = lvals._values
|
| 1655 |
+
if isinstance(rvals, ABCIndex):
|
| 1656 |
+
rvals = rvals._values
|
| 1657 |
+
# error: List item 0 has incompatible type "Union[ExtensionArray,
|
| 1658 |
+
# ndarray[Any, Any], Index]"; expected "Union[ExtensionArray,
|
| 1659 |
+
# ndarray[Any, Any]]"
|
| 1660 |
+
combined = concat_compat([lvals, rvals]) # type: ignore[list-item]
|
| 1661 |
+
unique_vals = unique(combined)
|
| 1662 |
+
unique_vals = ensure_wrapped_if_datetimelike(unique_vals)
|
| 1663 |
+
repeats = final_count.reindex(unique_vals).values
|
| 1664 |
+
return np.repeat(unique_vals, repeats)
|
| 1665 |
+
|
| 1666 |
+
|
| 1667 |
+
def map_array(
|
| 1668 |
+
arr: ArrayLike,
|
| 1669 |
+
mapper,
|
| 1670 |
+
na_action: Literal["ignore"] | None = None,
|
| 1671 |
+
convert: bool = True,
|
| 1672 |
+
) -> np.ndarray | ExtensionArray | Index:
|
| 1673 |
+
"""
|
| 1674 |
+
Map values using an input mapping or function.
|
| 1675 |
+
|
| 1676 |
+
Parameters
|
| 1677 |
+
----------
|
| 1678 |
+
mapper : function, dict, or Series
|
| 1679 |
+
Mapping correspondence.
|
| 1680 |
+
na_action : {None, 'ignore'}, default None
|
| 1681 |
+
If 'ignore', propagate NA values, without passing them to the
|
| 1682 |
+
mapping correspondence.
|
| 1683 |
+
convert : bool, default True
|
| 1684 |
+
Try to find better dtype for elementwise function results. If
|
| 1685 |
+
False, leave as dtype=object.
|
| 1686 |
+
|
| 1687 |
+
Returns
|
| 1688 |
+
-------
|
| 1689 |
+
Union[ndarray, Index, ExtensionArray]
|
| 1690 |
+
The output of the mapping function applied to the array.
|
| 1691 |
+
If the function returns a tuple with more than one element
|
| 1692 |
+
a MultiIndex will be returned.
|
| 1693 |
+
"""
|
| 1694 |
+
if na_action not in (None, "ignore"):
|
| 1695 |
+
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
|
| 1696 |
+
raise ValueError(msg)
|
| 1697 |
+
|
| 1698 |
+
# we can fastpath dict/Series to an efficient map
|
| 1699 |
+
# as we know that we are not going to have to yield
|
| 1700 |
+
# python types
|
| 1701 |
+
if is_dict_like(mapper):
|
| 1702 |
+
if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
|
| 1703 |
+
# If a dictionary subclass defines a default value method,
|
| 1704 |
+
# convert mapper to a lookup function (GH #15999).
|
| 1705 |
+
dict_with_default = mapper
|
| 1706 |
+
mapper = lambda x: dict_with_default[
|
| 1707 |
+
np.nan if isinstance(x, float) and np.isnan(x) else x
|
| 1708 |
+
]
|
| 1709 |
+
else:
|
| 1710 |
+
# Dictionary does not have a default. Thus it's safe to
|
| 1711 |
+
# convert to an Series for efficiency.
|
| 1712 |
+
# we specify the keys here to handle the
|
| 1713 |
+
# possibility that they are tuples
|
| 1714 |
+
|
| 1715 |
+
# The return value of mapping with an empty mapper is
|
| 1716 |
+
# expected to be pd.Series(np.nan, ...). As np.nan is
|
| 1717 |
+
# of dtype float64 the return value of this method should
|
| 1718 |
+
# be float64 as well
|
| 1719 |
+
from pandas import Series
|
| 1720 |
+
|
| 1721 |
+
if len(mapper) == 0:
|
| 1722 |
+
mapper = Series(mapper, dtype=np.float64)
|
| 1723 |
+
else:
|
| 1724 |
+
mapper = Series(mapper)
|
| 1725 |
+
|
| 1726 |
+
if isinstance(mapper, ABCSeries):
|
| 1727 |
+
if na_action == "ignore":
|
| 1728 |
+
mapper = mapper[mapper.index.notna()]
|
| 1729 |
+
|
| 1730 |
+
# Since values were input this means we came from either
|
| 1731 |
+
# a dict or a series and mapper should be an index
|
| 1732 |
+
indexer = mapper.index.get_indexer(arr)
|
| 1733 |
+
new_values = take_nd(mapper._values, indexer)
|
| 1734 |
+
|
| 1735 |
+
return new_values
|
| 1736 |
+
|
| 1737 |
+
if not len(arr):
|
| 1738 |
+
return arr.copy()
|
| 1739 |
+
|
| 1740 |
+
# we must convert to python types
|
| 1741 |
+
values = arr.astype(object, copy=False)
|
| 1742 |
+
if na_action is None:
|
| 1743 |
+
return lib.map_infer(values, mapper, convert=convert)
|
| 1744 |
+
else:
|
| 1745 |
+
return lib.map_infer_mask(
|
| 1746 |
+
values, mapper, mask=isna(values).view(np.uint8), convert=convert
|
| 1747 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/flags.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import TYPE_CHECKING
|
| 4 |
+
import weakref
|
| 5 |
+
|
| 6 |
+
if TYPE_CHECKING:
|
| 7 |
+
from pandas.core.generic import NDFrame
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Flags:
|
| 11 |
+
"""
|
| 12 |
+
Flags that apply to pandas objects.
|
| 13 |
+
|
| 14 |
+
Parameters
|
| 15 |
+
----------
|
| 16 |
+
obj : Series or DataFrame
|
| 17 |
+
The object these flags are associated with.
|
| 18 |
+
allows_duplicate_labels : bool, default True
|
| 19 |
+
Whether to allow duplicate labels in this object. By default,
|
| 20 |
+
duplicate labels are permitted. Setting this to ``False`` will
|
| 21 |
+
cause an :class:`errors.DuplicateLabelError` to be raised when
|
| 22 |
+
`index` (or columns for DataFrame) is not unique, or any
|
| 23 |
+
subsequent operation on introduces duplicates.
|
| 24 |
+
See :ref:`duplicates.disallow` for more.
|
| 25 |
+
|
| 26 |
+
.. warning::
|
| 27 |
+
|
| 28 |
+
This is an experimental feature. Currently, many methods fail to
|
| 29 |
+
propagate the ``allows_duplicate_labels`` value. In future versions
|
| 30 |
+
it is expected that every method taking or returning one or more
|
| 31 |
+
DataFrame or Series objects will propagate ``allows_duplicate_labels``.
|
| 32 |
+
|
| 33 |
+
Examples
|
| 34 |
+
--------
|
| 35 |
+
Attributes can be set in two ways:
|
| 36 |
+
|
| 37 |
+
>>> df = pd.DataFrame()
|
| 38 |
+
>>> df.flags
|
| 39 |
+
<Flags(allows_duplicate_labels=True)>
|
| 40 |
+
>>> df.flags.allows_duplicate_labels = False
|
| 41 |
+
>>> df.flags
|
| 42 |
+
<Flags(allows_duplicate_labels=False)>
|
| 43 |
+
|
| 44 |
+
>>> df.flags['allows_duplicate_labels'] = True
|
| 45 |
+
>>> df.flags
|
| 46 |
+
<Flags(allows_duplicate_labels=True)>
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
_keys: set[str] = {"allows_duplicate_labels"}
|
| 50 |
+
|
| 51 |
+
def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None:
|
| 52 |
+
self._allows_duplicate_labels = allows_duplicate_labels
|
| 53 |
+
self._obj = weakref.ref(obj)
|
| 54 |
+
|
| 55 |
+
@property
|
| 56 |
+
def allows_duplicate_labels(self) -> bool:
|
| 57 |
+
"""
|
| 58 |
+
Whether this object allows duplicate labels.
|
| 59 |
+
|
| 60 |
+
Setting ``allows_duplicate_labels=False`` ensures that the
|
| 61 |
+
index (and columns of a DataFrame) are unique. Most methods
|
| 62 |
+
that accept and return a Series or DataFrame will propagate
|
| 63 |
+
the value of ``allows_duplicate_labels``.
|
| 64 |
+
|
| 65 |
+
See :ref:`duplicates` for more.
|
| 66 |
+
|
| 67 |
+
See Also
|
| 68 |
+
--------
|
| 69 |
+
DataFrame.attrs : Set global metadata on this object.
|
| 70 |
+
DataFrame.set_flags : Set global flags on this object.
|
| 71 |
+
|
| 72 |
+
Examples
|
| 73 |
+
--------
|
| 74 |
+
>>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a'])
|
| 75 |
+
>>> df.flags.allows_duplicate_labels
|
| 76 |
+
True
|
| 77 |
+
>>> df.flags.allows_duplicate_labels = False
|
| 78 |
+
Traceback (most recent call last):
|
| 79 |
+
...
|
| 80 |
+
pandas.errors.DuplicateLabelError: Index has duplicates.
|
| 81 |
+
positions
|
| 82 |
+
label
|
| 83 |
+
a [0, 1]
|
| 84 |
+
"""
|
| 85 |
+
return self._allows_duplicate_labels
|
| 86 |
+
|
| 87 |
+
@allows_duplicate_labels.setter
|
| 88 |
+
def allows_duplicate_labels(self, value: bool) -> None:
|
| 89 |
+
value = bool(value)
|
| 90 |
+
obj = self._obj()
|
| 91 |
+
if obj is None:
|
| 92 |
+
raise ValueError("This flag's object has been deleted.")
|
| 93 |
+
|
| 94 |
+
if not value:
|
| 95 |
+
for ax in obj.axes:
|
| 96 |
+
ax._maybe_check_unique()
|
| 97 |
+
|
| 98 |
+
self._allows_duplicate_labels = value
|
| 99 |
+
|
| 100 |
+
def __getitem__(self, key: str):
|
| 101 |
+
if key not in self._keys:
|
| 102 |
+
raise KeyError(key)
|
| 103 |
+
|
| 104 |
+
return getattr(self, key)
|
| 105 |
+
|
| 106 |
+
def __setitem__(self, key: str, value) -> None:
|
| 107 |
+
if key not in self._keys:
|
| 108 |
+
raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}")
|
| 109 |
+
setattr(self, key, value)
|
| 110 |
+
|
| 111 |
+
def __repr__(self) -> str:
|
| 112 |
+
return f"<Flags(allows_duplicate_labels={self.allows_duplicate_labels})>"
|
| 113 |
+
|
| 114 |
+
def __eq__(self, other) -> bool:
|
| 115 |
+
if isinstance(other, type(self)):
|
| 116 |
+
return self.allows_duplicate_labels == other.allows_duplicate_labels
|
| 117 |
+
return False
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/roperator.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Reversed Operations not available in the stdlib operator module.
|
| 3 |
+
Defining these instead of using lambdas allows us to reference them by name.
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import operator
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def radd(left, right):
|
| 11 |
+
return right + left
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def rsub(left, right):
|
| 15 |
+
return right - left
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def rmul(left, right):
|
| 19 |
+
return right * left
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def rdiv(left, right):
|
| 23 |
+
return right / left
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def rtruediv(left, right):
|
| 27 |
+
return right / left
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def rfloordiv(left, right):
|
| 31 |
+
return right // left
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def rmod(left, right):
|
| 35 |
+
# check if right is a string as % is the string
|
| 36 |
+
# formatting operation; this is a TypeError
|
| 37 |
+
# otherwise perform the op
|
| 38 |
+
if isinstance(right, str):
|
| 39 |
+
typ = type(left).__name__
|
| 40 |
+
raise TypeError(f"{typ} cannot perform the operation mod")
|
| 41 |
+
|
| 42 |
+
return right % left
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def rdivmod(left, right):
|
| 46 |
+
return divmod(right, left)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def rpow(left, right):
|
| 50 |
+
return right**left
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def rand_(left, right):
|
| 54 |
+
return operator.and_(right, left)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def ror_(left, right):
|
| 58 |
+
return operator.or_(right, left)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def rxor(left, right):
|
| 62 |
+
return operator.xor(right, left)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__init__.py
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_aggregation.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.apply import (
|
| 5 |
+
_make_unique_kwarg_list,
|
| 6 |
+
maybe_mangle_lambdas,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_maybe_mangle_lambdas_passthrough():
|
| 11 |
+
assert maybe_mangle_lambdas("mean") == "mean"
|
| 12 |
+
assert maybe_mangle_lambdas(lambda x: x).__name__ == "<lambda>"
|
| 13 |
+
# don't mangel single lambda.
|
| 14 |
+
assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "<lambda>"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_maybe_mangle_lambdas_listlike():
|
| 18 |
+
aggfuncs = [lambda x: 1, lambda x: 2]
|
| 19 |
+
result = maybe_mangle_lambdas(aggfuncs)
|
| 20 |
+
assert result[0].__name__ == "<lambda_0>"
|
| 21 |
+
assert result[1].__name__ == "<lambda_1>"
|
| 22 |
+
assert aggfuncs[0](None) == result[0](None)
|
| 23 |
+
assert aggfuncs[1](None) == result[1](None)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_maybe_mangle_lambdas():
|
| 27 |
+
func = {"A": [lambda x: 0, lambda x: 1]}
|
| 28 |
+
result = maybe_mangle_lambdas(func)
|
| 29 |
+
assert result["A"][0].__name__ == "<lambda_0>"
|
| 30 |
+
assert result["A"][1].__name__ == "<lambda_1>"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_maybe_mangle_lambdas_args():
|
| 34 |
+
func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]}
|
| 35 |
+
result = maybe_mangle_lambdas(func)
|
| 36 |
+
assert result["A"][0].__name__ == "<lambda_0>"
|
| 37 |
+
assert result["A"][1].__name__ == "<lambda_1>"
|
| 38 |
+
|
| 39 |
+
assert func["A"][0](0, 1) == (0, 1, 1)
|
| 40 |
+
assert func["A"][0](0, 1, 2) == (0, 1, 2)
|
| 41 |
+
assert func["A"][0](0, 2, b=3) == (0, 2, 3)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def test_maybe_mangle_lambdas_named():
|
| 45 |
+
func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}}
|
| 46 |
+
result = maybe_mangle_lambdas(func)
|
| 47 |
+
assert result == func
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@pytest.mark.parametrize(
|
| 51 |
+
"order, expected_reorder",
|
| 52 |
+
[
|
| 53 |
+
(
|
| 54 |
+
[
|
| 55 |
+
("height", "<lambda>"),
|
| 56 |
+
("height", "max"),
|
| 57 |
+
("weight", "max"),
|
| 58 |
+
("height", "<lambda>"),
|
| 59 |
+
("weight", "<lambda>"),
|
| 60 |
+
],
|
| 61 |
+
[
|
| 62 |
+
("height", "<lambda>_0"),
|
| 63 |
+
("height", "max"),
|
| 64 |
+
("weight", "max"),
|
| 65 |
+
("height", "<lambda>_1"),
|
| 66 |
+
("weight", "<lambda>"),
|
| 67 |
+
],
|
| 68 |
+
),
|
| 69 |
+
(
|
| 70 |
+
[
|
| 71 |
+
("col2", "min"),
|
| 72 |
+
("col1", "<lambda>"),
|
| 73 |
+
("col1", "<lambda>"),
|
| 74 |
+
("col1", "<lambda>"),
|
| 75 |
+
],
|
| 76 |
+
[
|
| 77 |
+
("col2", "min"),
|
| 78 |
+
("col1", "<lambda>_0"),
|
| 79 |
+
("col1", "<lambda>_1"),
|
| 80 |
+
("col1", "<lambda>_2"),
|
| 81 |
+
],
|
| 82 |
+
),
|
| 83 |
+
(
|
| 84 |
+
[("col", "<lambda>"), ("col", "<lambda>"), ("col", "<lambda>")],
|
| 85 |
+
[("col", "<lambda>_0"), ("col", "<lambda>_1"), ("col", "<lambda>_2")],
|
| 86 |
+
),
|
| 87 |
+
],
|
| 88 |
+
)
|
| 89 |
+
def test_make_unique(order, expected_reorder):
|
| 90 |
+
# GH 27519, test if make_unique function reorders correctly
|
| 91 |
+
result = _make_unique_kwarg_list(order)
|
| 92 |
+
|
| 93 |
+
assert result == expected_reorder
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_algos.py
ADDED
|
@@ -0,0 +1,2041 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
import struct
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
from pandas._libs import (
|
| 8 |
+
algos as libalgos,
|
| 9 |
+
hashtable as ht,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
from pandas.core.dtypes.common import (
|
| 13 |
+
is_bool_dtype,
|
| 14 |
+
is_complex_dtype,
|
| 15 |
+
is_float_dtype,
|
| 16 |
+
is_integer_dtype,
|
| 17 |
+
is_object_dtype,
|
| 18 |
+
)
|
| 19 |
+
from pandas.core.dtypes.dtypes import CategoricalDtype
|
| 20 |
+
|
| 21 |
+
import pandas as pd
|
| 22 |
+
from pandas import (
|
| 23 |
+
Categorical,
|
| 24 |
+
CategoricalIndex,
|
| 25 |
+
DataFrame,
|
| 26 |
+
DatetimeIndex,
|
| 27 |
+
Index,
|
| 28 |
+
IntervalIndex,
|
| 29 |
+
MultiIndex,
|
| 30 |
+
NaT,
|
| 31 |
+
Period,
|
| 32 |
+
PeriodIndex,
|
| 33 |
+
Series,
|
| 34 |
+
Timedelta,
|
| 35 |
+
Timestamp,
|
| 36 |
+
cut,
|
| 37 |
+
date_range,
|
| 38 |
+
timedelta_range,
|
| 39 |
+
to_datetime,
|
| 40 |
+
to_timedelta,
|
| 41 |
+
)
|
| 42 |
+
import pandas._testing as tm
|
| 43 |
+
import pandas.core.algorithms as algos
|
| 44 |
+
from pandas.core.arrays import (
|
| 45 |
+
DatetimeArray,
|
| 46 |
+
TimedeltaArray,
|
| 47 |
+
)
|
| 48 |
+
import pandas.core.common as com
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class TestFactorize:
|
| 52 |
+
def test_factorize_complex(self):
|
| 53 |
+
# GH#17927
|
| 54 |
+
array = [1, 2, 2 + 1j]
|
| 55 |
+
msg = "factorize with argument that is not not a Series"
|
| 56 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 57 |
+
labels, uniques = algos.factorize(array)
|
| 58 |
+
|
| 59 |
+
expected_labels = np.array([0, 1, 2], dtype=np.intp)
|
| 60 |
+
tm.assert_numpy_array_equal(labels, expected_labels)
|
| 61 |
+
|
| 62 |
+
# Should return a complex dtype in the future
|
| 63 |
+
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
|
| 64 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 65 |
+
|
| 66 |
+
@pytest.mark.parametrize("sort", [True, False])
|
| 67 |
+
def test_factorize(self, index_or_series_obj, sort):
|
| 68 |
+
obj = index_or_series_obj
|
| 69 |
+
result_codes, result_uniques = obj.factorize(sort=sort)
|
| 70 |
+
|
| 71 |
+
constructor = Index
|
| 72 |
+
if isinstance(obj, MultiIndex):
|
| 73 |
+
constructor = MultiIndex.from_tuples
|
| 74 |
+
expected_arr = obj.unique()
|
| 75 |
+
if expected_arr.dtype == np.float16:
|
| 76 |
+
expected_arr = expected_arr.astype(np.float32)
|
| 77 |
+
expected_uniques = constructor(expected_arr)
|
| 78 |
+
if (
|
| 79 |
+
isinstance(obj, Index)
|
| 80 |
+
and expected_uniques.dtype == bool
|
| 81 |
+
and obj.dtype == object
|
| 82 |
+
):
|
| 83 |
+
expected_uniques = expected_uniques.astype(object)
|
| 84 |
+
|
| 85 |
+
if sort:
|
| 86 |
+
expected_uniques = expected_uniques.sort_values()
|
| 87 |
+
|
| 88 |
+
# construct an integer ndarray so that
|
| 89 |
+
# `expected_uniques.take(expected_codes)` is equal to `obj`
|
| 90 |
+
expected_uniques_list = list(expected_uniques)
|
| 91 |
+
expected_codes = [expected_uniques_list.index(val) for val in obj]
|
| 92 |
+
expected_codes = np.asarray(expected_codes, dtype=np.intp)
|
| 93 |
+
|
| 94 |
+
tm.assert_numpy_array_equal(result_codes, expected_codes)
|
| 95 |
+
tm.assert_index_equal(result_uniques, expected_uniques, exact=True)
|
| 96 |
+
|
| 97 |
+
def test_series_factorize_use_na_sentinel_false(self):
|
| 98 |
+
# GH#35667
|
| 99 |
+
values = np.array([1, 2, 1, np.nan])
|
| 100 |
+
ser = Series(values)
|
| 101 |
+
codes, uniques = ser.factorize(use_na_sentinel=False)
|
| 102 |
+
|
| 103 |
+
expected_codes = np.array([0, 1, 0, 2], dtype=np.intp)
|
| 104 |
+
expected_uniques = Index([1.0, 2.0, np.nan])
|
| 105 |
+
|
| 106 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 107 |
+
tm.assert_index_equal(uniques, expected_uniques)
|
| 108 |
+
|
| 109 |
+
def test_basic(self):
|
| 110 |
+
items = np.array(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
|
| 111 |
+
codes, uniques = algos.factorize(items)
|
| 112 |
+
tm.assert_numpy_array_equal(uniques, np.array(["a", "b", "c"], dtype=object))
|
| 113 |
+
|
| 114 |
+
codes, uniques = algos.factorize(items, sort=True)
|
| 115 |
+
exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
|
| 116 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 117 |
+
exp = np.array(["a", "b", "c"], dtype=object)
|
| 118 |
+
tm.assert_numpy_array_equal(uniques, exp)
|
| 119 |
+
|
| 120 |
+
arr = np.arange(5, dtype=np.intp)[::-1]
|
| 121 |
+
|
| 122 |
+
codes, uniques = algos.factorize(arr)
|
| 123 |
+
exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
|
| 124 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 125 |
+
exp = np.array([4, 3, 2, 1, 0], dtype=arr.dtype)
|
| 126 |
+
tm.assert_numpy_array_equal(uniques, exp)
|
| 127 |
+
|
| 128 |
+
codes, uniques = algos.factorize(arr, sort=True)
|
| 129 |
+
exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
|
| 130 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 131 |
+
exp = np.array([0, 1, 2, 3, 4], dtype=arr.dtype)
|
| 132 |
+
tm.assert_numpy_array_equal(uniques, exp)
|
| 133 |
+
|
| 134 |
+
arr = np.arange(5.0)[::-1]
|
| 135 |
+
|
| 136 |
+
codes, uniques = algos.factorize(arr)
|
| 137 |
+
exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
|
| 138 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 139 |
+
exp = np.array([4.0, 3.0, 2.0, 1.0, 0.0], dtype=arr.dtype)
|
| 140 |
+
tm.assert_numpy_array_equal(uniques, exp)
|
| 141 |
+
|
| 142 |
+
codes, uniques = algos.factorize(arr, sort=True)
|
| 143 |
+
exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
|
| 144 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 145 |
+
exp = np.array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=arr.dtype)
|
| 146 |
+
tm.assert_numpy_array_equal(uniques, exp)
|
| 147 |
+
|
| 148 |
+
def test_mixed(self):
|
| 149 |
+
# doc example reshaping.rst
|
| 150 |
+
x = Series(["A", "A", np.nan, "B", 3.14, np.inf])
|
| 151 |
+
codes, uniques = algos.factorize(x)
|
| 152 |
+
|
| 153 |
+
exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
|
| 154 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 155 |
+
exp = Index(["A", "B", 3.14, np.inf])
|
| 156 |
+
tm.assert_index_equal(uniques, exp)
|
| 157 |
+
|
| 158 |
+
codes, uniques = algos.factorize(x, sort=True)
|
| 159 |
+
exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
|
| 160 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 161 |
+
exp = Index([3.14, np.inf, "A", "B"])
|
| 162 |
+
tm.assert_index_equal(uniques, exp)
|
| 163 |
+
|
| 164 |
+
def test_factorize_datetime64(self):
|
| 165 |
+
# M8
|
| 166 |
+
v1 = Timestamp("20130101 09:00:00.00004")
|
| 167 |
+
v2 = Timestamp("20130101")
|
| 168 |
+
x = Series([v1, v1, v1, v2, v2, v1])
|
| 169 |
+
codes, uniques = algos.factorize(x)
|
| 170 |
+
|
| 171 |
+
exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
|
| 172 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 173 |
+
exp = DatetimeIndex([v1, v2])
|
| 174 |
+
tm.assert_index_equal(uniques, exp)
|
| 175 |
+
|
| 176 |
+
codes, uniques = algos.factorize(x, sort=True)
|
| 177 |
+
exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp)
|
| 178 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 179 |
+
exp = DatetimeIndex([v2, v1])
|
| 180 |
+
tm.assert_index_equal(uniques, exp)
|
| 181 |
+
|
| 182 |
+
def test_factorize_period(self):
|
| 183 |
+
# period
|
| 184 |
+
v1 = Period("201302", freq="M")
|
| 185 |
+
v2 = Period("201303", freq="M")
|
| 186 |
+
x = Series([v1, v1, v1, v2, v2, v1])
|
| 187 |
+
|
| 188 |
+
# periods are not 'sorted' as they are converted back into an index
|
| 189 |
+
codes, uniques = algos.factorize(x)
|
| 190 |
+
exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
|
| 191 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 192 |
+
tm.assert_index_equal(uniques, PeriodIndex([v1, v2]))
|
| 193 |
+
|
| 194 |
+
codes, uniques = algos.factorize(x, sort=True)
|
| 195 |
+
exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
|
| 196 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 197 |
+
tm.assert_index_equal(uniques, PeriodIndex([v1, v2]))
|
| 198 |
+
|
| 199 |
+
def test_factorize_timedelta(self):
|
| 200 |
+
# GH 5986
|
| 201 |
+
v1 = to_timedelta("1 day 1 min")
|
| 202 |
+
v2 = to_timedelta("1 day")
|
| 203 |
+
x = Series([v1, v2, v1, v1, v2, v2, v1])
|
| 204 |
+
codes, uniques = algos.factorize(x)
|
| 205 |
+
exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp)
|
| 206 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 207 |
+
tm.assert_index_equal(uniques, to_timedelta([v1, v2]))
|
| 208 |
+
|
| 209 |
+
codes, uniques = algos.factorize(x, sort=True)
|
| 210 |
+
exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp)
|
| 211 |
+
tm.assert_numpy_array_equal(codes, exp)
|
| 212 |
+
tm.assert_index_equal(uniques, to_timedelta([v2, v1]))
|
| 213 |
+
|
| 214 |
+
def test_factorize_nan(self):
|
| 215 |
+
# nan should map to na_sentinel, not reverse_indexer[na_sentinel]
|
| 216 |
+
# rizer.factorize should not raise an exception if na_sentinel indexes
|
| 217 |
+
# outside of reverse_indexer
|
| 218 |
+
key = np.array([1, 2, 1, np.nan], dtype="O")
|
| 219 |
+
rizer = ht.ObjectFactorizer(len(key))
|
| 220 |
+
for na_sentinel in (-1, 20):
|
| 221 |
+
ids = rizer.factorize(key, na_sentinel=na_sentinel)
|
| 222 |
+
expected = np.array([0, 1, 0, na_sentinel], dtype=np.intp)
|
| 223 |
+
assert len(set(key)) == len(set(expected))
|
| 224 |
+
tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
|
| 225 |
+
tm.assert_numpy_array_equal(ids, expected)
|
| 226 |
+
|
| 227 |
+
def test_factorizer_with_mask(self):
|
| 228 |
+
# GH#49549
|
| 229 |
+
data = np.array([1, 2, 3, 1, 1, 0], dtype="int64")
|
| 230 |
+
mask = np.array([False, False, False, False, False, True])
|
| 231 |
+
rizer = ht.Int64Factorizer(len(data))
|
| 232 |
+
result = rizer.factorize(data, mask=mask)
|
| 233 |
+
expected = np.array([0, 1, 2, 0, 0, -1], dtype=np.intp)
|
| 234 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 235 |
+
expected_uniques = np.array([1, 2, 3], dtype="int64")
|
| 236 |
+
tm.assert_numpy_array_equal(rizer.uniques.to_array(), expected_uniques)
|
| 237 |
+
|
| 238 |
+
def test_factorizer_object_with_nan(self):
|
| 239 |
+
# GH#49549
|
| 240 |
+
data = np.array([1, 2, 3, 1, np.nan])
|
| 241 |
+
rizer = ht.ObjectFactorizer(len(data))
|
| 242 |
+
result = rizer.factorize(data.astype(object))
|
| 243 |
+
expected = np.array([0, 1, 2, 0, -1], dtype=np.intp)
|
| 244 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 245 |
+
expected_uniques = np.array([1, 2, 3], dtype=object)
|
| 246 |
+
tm.assert_numpy_array_equal(rizer.uniques.to_array(), expected_uniques)
|
| 247 |
+
|
| 248 |
+
@pytest.mark.parametrize(
|
| 249 |
+
"data, expected_codes, expected_uniques",
|
| 250 |
+
[
|
| 251 |
+
(
|
| 252 |
+
[(1, 1), (1, 2), (0, 0), (1, 2), "nonsense"],
|
| 253 |
+
[0, 1, 2, 1, 3],
|
| 254 |
+
[(1, 1), (1, 2), (0, 0), "nonsense"],
|
| 255 |
+
),
|
| 256 |
+
(
|
| 257 |
+
[(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)],
|
| 258 |
+
[0, 1, 2, 1, 3],
|
| 259 |
+
[(1, 1), (1, 2), (0, 0), (1, 2, 3)],
|
| 260 |
+
),
|
| 261 |
+
([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]),
|
| 262 |
+
],
|
| 263 |
+
)
|
| 264 |
+
def test_factorize_tuple_list(self, data, expected_codes, expected_uniques):
|
| 265 |
+
# GH9454
|
| 266 |
+
msg = "factorize with argument that is not not a Series"
|
| 267 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 268 |
+
codes, uniques = pd.factorize(data)
|
| 269 |
+
|
| 270 |
+
tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp))
|
| 271 |
+
|
| 272 |
+
expected_uniques_array = com.asarray_tuplesafe(expected_uniques, dtype=object)
|
| 273 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques_array)
|
| 274 |
+
|
| 275 |
+
def test_complex_sorting(self):
|
| 276 |
+
# gh 12666 - check no segfault
|
| 277 |
+
x17 = np.array([complex(i) for i in range(17)], dtype=object)
|
| 278 |
+
|
| 279 |
+
msg = "'[<>]' not supported between instances of .*"
|
| 280 |
+
with pytest.raises(TypeError, match=msg):
|
| 281 |
+
algos.factorize(x17[::-1], sort=True)
|
| 282 |
+
|
| 283 |
+
def test_numeric_dtype_factorize(self, any_real_numpy_dtype):
|
| 284 |
+
# GH41132
|
| 285 |
+
dtype = any_real_numpy_dtype
|
| 286 |
+
data = np.array([1, 2, 2, 1], dtype=dtype)
|
| 287 |
+
expected_codes = np.array([0, 1, 1, 0], dtype=np.intp)
|
| 288 |
+
expected_uniques = np.array([1, 2], dtype=dtype)
|
| 289 |
+
|
| 290 |
+
codes, uniques = algos.factorize(data)
|
| 291 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 292 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 293 |
+
|
| 294 |
+
def test_float64_factorize(self, writable):
|
| 295 |
+
data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
|
| 296 |
+
data.setflags(write=writable)
|
| 297 |
+
expected_codes = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
|
| 298 |
+
expected_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)
|
| 299 |
+
|
| 300 |
+
codes, uniques = algos.factorize(data)
|
| 301 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 302 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 303 |
+
|
| 304 |
+
def test_uint64_factorize(self, writable):
|
| 305 |
+
data = np.array([2**64 - 1, 1, 2**64 - 1], dtype=np.uint64)
|
| 306 |
+
data.setflags(write=writable)
|
| 307 |
+
expected_codes = np.array([0, 1, 0], dtype=np.intp)
|
| 308 |
+
expected_uniques = np.array([2**64 - 1, 1], dtype=np.uint64)
|
| 309 |
+
|
| 310 |
+
codes, uniques = algos.factorize(data)
|
| 311 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 312 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 313 |
+
|
| 314 |
+
def test_int64_factorize(self, writable):
|
| 315 |
+
data = np.array([2**63 - 1, -(2**63), 2**63 - 1], dtype=np.int64)
|
| 316 |
+
data.setflags(write=writable)
|
| 317 |
+
expected_codes = np.array([0, 1, 0], dtype=np.intp)
|
| 318 |
+
expected_uniques = np.array([2**63 - 1, -(2**63)], dtype=np.int64)
|
| 319 |
+
|
| 320 |
+
codes, uniques = algos.factorize(data)
|
| 321 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 322 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 323 |
+
|
| 324 |
+
def test_string_factorize(self, writable):
|
| 325 |
+
data = np.array(["a", "c", "a", "b", "c"], dtype=object)
|
| 326 |
+
data.setflags(write=writable)
|
| 327 |
+
expected_codes = np.array([0, 1, 0, 2, 1], dtype=np.intp)
|
| 328 |
+
expected_uniques = np.array(["a", "c", "b"], dtype=object)
|
| 329 |
+
|
| 330 |
+
codes, uniques = algos.factorize(data)
|
| 331 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 332 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 333 |
+
|
| 334 |
+
def test_object_factorize(self, writable):
|
| 335 |
+
data = np.array(["a", "c", None, np.nan, "a", "b", NaT, "c"], dtype=object)
|
| 336 |
+
data.setflags(write=writable)
|
| 337 |
+
expected_codes = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
|
| 338 |
+
expected_uniques = np.array(["a", "c", "b"], dtype=object)
|
| 339 |
+
|
| 340 |
+
codes, uniques = algos.factorize(data)
|
| 341 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 342 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 343 |
+
|
| 344 |
+
def test_datetime64_factorize(self, writable):
|
| 345 |
+
# GH35650 Verify whether read-only datetime64 array can be factorized
|
| 346 |
+
data = np.array([np.datetime64("2020-01-01T00:00:00.000")], dtype="M8[ns]")
|
| 347 |
+
data.setflags(write=writable)
|
| 348 |
+
expected_codes = np.array([0], dtype=np.intp)
|
| 349 |
+
expected_uniques = np.array(
|
| 350 |
+
["2020-01-01T00:00:00.000000000"], dtype="datetime64[ns]"
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
codes, uniques = pd.factorize(data)
|
| 354 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 355 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 356 |
+
|
| 357 |
+
@pytest.mark.parametrize("sort", [True, False])
|
| 358 |
+
def test_factorize_rangeindex(self, sort):
|
| 359 |
+
# increasing -> sort doesn't matter
|
| 360 |
+
ri = pd.RangeIndex.from_range(range(10))
|
| 361 |
+
expected = np.arange(10, dtype=np.intp), ri
|
| 362 |
+
|
| 363 |
+
result = algos.factorize(ri, sort=sort)
|
| 364 |
+
tm.assert_numpy_array_equal(result[0], expected[0])
|
| 365 |
+
tm.assert_index_equal(result[1], expected[1], exact=True)
|
| 366 |
+
|
| 367 |
+
result = ri.factorize(sort=sort)
|
| 368 |
+
tm.assert_numpy_array_equal(result[0], expected[0])
|
| 369 |
+
tm.assert_index_equal(result[1], expected[1], exact=True)
|
| 370 |
+
|
| 371 |
+
@pytest.mark.parametrize("sort", [True, False])
|
| 372 |
+
def test_factorize_rangeindex_decreasing(self, sort):
|
| 373 |
+
# decreasing -> sort matters
|
| 374 |
+
ri = pd.RangeIndex.from_range(range(10))
|
| 375 |
+
expected = np.arange(10, dtype=np.intp), ri
|
| 376 |
+
|
| 377 |
+
ri2 = ri[::-1]
|
| 378 |
+
expected = expected[0], ri2
|
| 379 |
+
if sort:
|
| 380 |
+
expected = expected[0][::-1], expected[1][::-1]
|
| 381 |
+
|
| 382 |
+
result = algos.factorize(ri2, sort=sort)
|
| 383 |
+
tm.assert_numpy_array_equal(result[0], expected[0])
|
| 384 |
+
tm.assert_index_equal(result[1], expected[1], exact=True)
|
| 385 |
+
|
| 386 |
+
result = ri2.factorize(sort=sort)
|
| 387 |
+
tm.assert_numpy_array_equal(result[0], expected[0])
|
| 388 |
+
tm.assert_index_equal(result[1], expected[1], exact=True)
|
| 389 |
+
|
| 390 |
+
def test_deprecate_order(self):
|
| 391 |
+
# gh 19727 - check warning is raised for deprecated keyword, order.
|
| 392 |
+
# Test not valid once order keyword is removed.
|
| 393 |
+
data = np.array([2**63, 1, 2**63], dtype=np.uint64)
|
| 394 |
+
with pytest.raises(TypeError, match="got an unexpected keyword"):
|
| 395 |
+
algos.factorize(data, order=True)
|
| 396 |
+
with tm.assert_produces_warning(False):
|
| 397 |
+
algos.factorize(data)
|
| 398 |
+
|
| 399 |
+
@pytest.mark.parametrize(
|
| 400 |
+
"data",
|
| 401 |
+
[
|
| 402 |
+
np.array([0, 1, 0], dtype="u8"),
|
| 403 |
+
np.array([-(2**63), 1, -(2**63)], dtype="i8"),
|
| 404 |
+
np.array(["__nan__", "foo", "__nan__"], dtype="object"),
|
| 405 |
+
],
|
| 406 |
+
)
|
| 407 |
+
def test_parametrized_factorize_na_value_default(self, data):
|
| 408 |
+
# arrays that include the NA default for that type, but isn't used.
|
| 409 |
+
codes, uniques = algos.factorize(data)
|
| 410 |
+
expected_uniques = data[[0, 1]]
|
| 411 |
+
expected_codes = np.array([0, 1, 0], dtype=np.intp)
|
| 412 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 413 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 414 |
+
|
| 415 |
+
@pytest.mark.parametrize(
|
| 416 |
+
"data, na_value",
|
| 417 |
+
[
|
| 418 |
+
(np.array([0, 1, 0, 2], dtype="u8"), 0),
|
| 419 |
+
(np.array([1, 0, 1, 2], dtype="u8"), 1),
|
| 420 |
+
(np.array([-(2**63), 1, -(2**63), 0], dtype="i8"), -(2**63)),
|
| 421 |
+
(np.array([1, -(2**63), 1, 0], dtype="i8"), 1),
|
| 422 |
+
(np.array(["a", "", "a", "b"], dtype=object), "a"),
|
| 423 |
+
(np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()),
|
| 424 |
+
(np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)),
|
| 425 |
+
],
|
| 426 |
+
)
|
| 427 |
+
def test_parametrized_factorize_na_value(self, data, na_value):
|
| 428 |
+
codes, uniques = algos.factorize_array(data, na_value=na_value)
|
| 429 |
+
expected_uniques = data[[1, 3]]
|
| 430 |
+
expected_codes = np.array([-1, 0, -1, 1], dtype=np.intp)
|
| 431 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 432 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 433 |
+
|
| 434 |
+
@pytest.mark.parametrize("sort", [True, False])
|
| 435 |
+
@pytest.mark.parametrize(
|
| 436 |
+
"data, uniques",
|
| 437 |
+
[
|
| 438 |
+
(
|
| 439 |
+
np.array(["b", "a", None, "b"], dtype=object),
|
| 440 |
+
np.array(["b", "a"], dtype=object),
|
| 441 |
+
),
|
| 442 |
+
(
|
| 443 |
+
pd.array([2, 1, np.nan, 2], dtype="Int64"),
|
| 444 |
+
pd.array([2, 1], dtype="Int64"),
|
| 445 |
+
),
|
| 446 |
+
],
|
| 447 |
+
ids=["numpy_array", "extension_array"],
|
| 448 |
+
)
|
| 449 |
+
def test_factorize_use_na_sentinel(self, sort, data, uniques):
|
| 450 |
+
codes, uniques = algos.factorize(data, sort=sort, use_na_sentinel=True)
|
| 451 |
+
if sort:
|
| 452 |
+
expected_codes = np.array([1, 0, -1, 1], dtype=np.intp)
|
| 453 |
+
expected_uniques = algos.safe_sort(uniques)
|
| 454 |
+
else:
|
| 455 |
+
expected_codes = np.array([0, 1, -1, 0], dtype=np.intp)
|
| 456 |
+
expected_uniques = uniques
|
| 457 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 458 |
+
if isinstance(data, np.ndarray):
|
| 459 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques)
|
| 460 |
+
else:
|
| 461 |
+
tm.assert_extension_array_equal(uniques, expected_uniques)
|
| 462 |
+
|
| 463 |
+
@pytest.mark.parametrize(
|
| 464 |
+
"data, expected_codes, expected_uniques",
|
| 465 |
+
[
|
| 466 |
+
(
|
| 467 |
+
["a", None, "b", "a"],
|
| 468 |
+
np.array([0, 1, 2, 0], dtype=np.dtype("intp")),
|
| 469 |
+
np.array(["a", np.nan, "b"], dtype=object),
|
| 470 |
+
),
|
| 471 |
+
(
|
| 472 |
+
["a", np.nan, "b", "a"],
|
| 473 |
+
np.array([0, 1, 2, 0], dtype=np.dtype("intp")),
|
| 474 |
+
np.array(["a", np.nan, "b"], dtype=object),
|
| 475 |
+
),
|
| 476 |
+
],
|
| 477 |
+
)
|
| 478 |
+
def test_object_factorize_use_na_sentinel_false(
|
| 479 |
+
self, data, expected_codes, expected_uniques
|
| 480 |
+
):
|
| 481 |
+
codes, uniques = algos.factorize(
|
| 482 |
+
np.array(data, dtype=object), use_na_sentinel=False
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
|
| 486 |
+
tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
|
| 487 |
+
|
| 488 |
+
@pytest.mark.parametrize(
|
| 489 |
+
"data, expected_codes, expected_uniques",
|
| 490 |
+
[
|
| 491 |
+
(
|
| 492 |
+
[1, None, 1, 2],
|
| 493 |
+
np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
|
| 494 |
+
np.array([1, np.nan, 2], dtype="O"),
|
| 495 |
+
),
|
| 496 |
+
(
|
| 497 |
+
[1, np.nan, 1, 2],
|
| 498 |
+
np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
|
| 499 |
+
np.array([1, np.nan, 2], dtype=np.float64),
|
| 500 |
+
),
|
| 501 |
+
],
|
| 502 |
+
)
|
| 503 |
+
def test_int_factorize_use_na_sentinel_false(
|
| 504 |
+
self, data, expected_codes, expected_uniques
|
| 505 |
+
):
|
| 506 |
+
msg = "factorize with argument that is not not a Series"
|
| 507 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 508 |
+
codes, uniques = algos.factorize(data, use_na_sentinel=False)
|
| 509 |
+
|
| 510 |
+
tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
|
| 511 |
+
tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
|
| 512 |
+
|
| 513 |
+
@pytest.mark.parametrize(
|
| 514 |
+
"data, expected_codes, expected_uniques",
|
| 515 |
+
[
|
| 516 |
+
(
|
| 517 |
+
Index(Categorical(["a", "a", "b"])),
|
| 518 |
+
np.array([0, 0, 1], dtype=np.intp),
|
| 519 |
+
CategoricalIndex(["a", "b"], categories=["a", "b"], dtype="category"),
|
| 520 |
+
),
|
| 521 |
+
(
|
| 522 |
+
Series(Categorical(["a", "a", "b"])),
|
| 523 |
+
np.array([0, 0, 1], dtype=np.intp),
|
| 524 |
+
CategoricalIndex(["a", "b"], categories=["a", "b"], dtype="category"),
|
| 525 |
+
),
|
| 526 |
+
(
|
| 527 |
+
Series(DatetimeIndex(["2017", "2017"], tz="US/Eastern")),
|
| 528 |
+
np.array([0, 0], dtype=np.intp),
|
| 529 |
+
DatetimeIndex(["2017"], tz="US/Eastern"),
|
| 530 |
+
),
|
| 531 |
+
],
|
| 532 |
+
)
|
| 533 |
+
def test_factorize_mixed_values(self, data, expected_codes, expected_uniques):
|
| 534 |
+
# GH 19721
|
| 535 |
+
codes, uniques = algos.factorize(data)
|
| 536 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 537 |
+
tm.assert_index_equal(uniques, expected_uniques)
|
| 538 |
+
|
| 539 |
+
def test_factorize_interval_non_nano(self, unit):
|
| 540 |
+
# GH#56099
|
| 541 |
+
left = DatetimeIndex(["2016-01-01", np.nan, "2015-10-11"]).as_unit(unit)
|
| 542 |
+
right = DatetimeIndex(["2016-01-02", np.nan, "2015-10-15"]).as_unit(unit)
|
| 543 |
+
idx = IntervalIndex.from_arrays(left, right)
|
| 544 |
+
codes, cats = idx.factorize()
|
| 545 |
+
assert cats.dtype == f"interval[datetime64[{unit}], right]"
|
| 546 |
+
|
| 547 |
+
ts = Timestamp(0).as_unit(unit)
|
| 548 |
+
idx2 = IntervalIndex.from_arrays(left - ts, right - ts)
|
| 549 |
+
codes2, cats2 = idx2.factorize()
|
| 550 |
+
assert cats2.dtype == f"interval[timedelta64[{unit}], right]"
|
| 551 |
+
|
| 552 |
+
idx3 = IntervalIndex.from_arrays(
|
| 553 |
+
left.tz_localize("US/Pacific"), right.tz_localize("US/Pacific")
|
| 554 |
+
)
|
| 555 |
+
codes3, cats3 = idx3.factorize()
|
| 556 |
+
assert cats3.dtype == f"interval[datetime64[{unit}, US/Pacific], right]"
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
class TestUnique:
|
| 560 |
+
def test_ints(self):
|
| 561 |
+
arr = np.random.default_rng(2).integers(0, 100, size=50)
|
| 562 |
+
|
| 563 |
+
result = algos.unique(arr)
|
| 564 |
+
assert isinstance(result, np.ndarray)
|
| 565 |
+
|
| 566 |
+
def test_objects(self):
|
| 567 |
+
arr = np.random.default_rng(2).integers(0, 100, size=50).astype("O")
|
| 568 |
+
|
| 569 |
+
result = algos.unique(arr)
|
| 570 |
+
assert isinstance(result, np.ndarray)
|
| 571 |
+
|
| 572 |
+
def test_object_refcount_bug(self):
|
| 573 |
+
lst = np.array(["A", "B", "C", "D", "E"], dtype=object)
|
| 574 |
+
for i in range(1000):
|
| 575 |
+
len(algos.unique(lst))
|
| 576 |
+
|
| 577 |
+
def test_on_index_object(self):
|
| 578 |
+
mindex = MultiIndex.from_arrays(
|
| 579 |
+
[np.arange(5).repeat(5), np.tile(np.arange(5), 5)]
|
| 580 |
+
)
|
| 581 |
+
expected = mindex.values
|
| 582 |
+
expected.sort()
|
| 583 |
+
|
| 584 |
+
mindex = mindex.repeat(2)
|
| 585 |
+
|
| 586 |
+
result = pd.unique(mindex)
|
| 587 |
+
result.sort()
|
| 588 |
+
|
| 589 |
+
tm.assert_almost_equal(result, expected)
|
| 590 |
+
|
| 591 |
+
def test_dtype_preservation(self, any_numpy_dtype):
|
| 592 |
+
# GH 15442
|
| 593 |
+
if any_numpy_dtype in (tm.BYTES_DTYPES + tm.STRING_DTYPES):
|
| 594 |
+
data = [1, 2, 2]
|
| 595 |
+
uniques = [1, 2]
|
| 596 |
+
elif is_integer_dtype(any_numpy_dtype):
|
| 597 |
+
data = [1, 2, 2]
|
| 598 |
+
uniques = [1, 2]
|
| 599 |
+
elif is_float_dtype(any_numpy_dtype):
|
| 600 |
+
data = [1, 2, 2]
|
| 601 |
+
uniques = [1.0, 2.0]
|
| 602 |
+
elif is_complex_dtype(any_numpy_dtype):
|
| 603 |
+
data = [complex(1, 0), complex(2, 0), complex(2, 0)]
|
| 604 |
+
uniques = [complex(1, 0), complex(2, 0)]
|
| 605 |
+
elif is_bool_dtype(any_numpy_dtype):
|
| 606 |
+
data = [True, True, False]
|
| 607 |
+
uniques = [True, False]
|
| 608 |
+
elif is_object_dtype(any_numpy_dtype):
|
| 609 |
+
data = ["A", "B", "B"]
|
| 610 |
+
uniques = ["A", "B"]
|
| 611 |
+
else:
|
| 612 |
+
# datetime64[ns]/M8[ns]/timedelta64[ns]/m8[ns] tested elsewhere
|
| 613 |
+
data = [1, 2, 2]
|
| 614 |
+
uniques = [1, 2]
|
| 615 |
+
|
| 616 |
+
result = Series(data, dtype=any_numpy_dtype).unique()
|
| 617 |
+
expected = np.array(uniques, dtype=any_numpy_dtype)
|
| 618 |
+
|
| 619 |
+
if any_numpy_dtype in tm.STRING_DTYPES:
|
| 620 |
+
expected = expected.astype(object)
|
| 621 |
+
|
| 622 |
+
if expected.dtype.kind in ["m", "M"]:
|
| 623 |
+
# We get TimedeltaArray/DatetimeArray
|
| 624 |
+
assert isinstance(result, (DatetimeArray, TimedeltaArray))
|
| 625 |
+
result = np.array(result)
|
| 626 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 627 |
+
|
| 628 |
+
def test_datetime64_dtype_array_returned(self):
|
| 629 |
+
# GH 9431
|
| 630 |
+
expected = np.array(
|
| 631 |
+
[
|
| 632 |
+
"2015-01-03T00:00:00.000000000",
|
| 633 |
+
"2015-01-01T00:00:00.000000000",
|
| 634 |
+
],
|
| 635 |
+
dtype="M8[ns]",
|
| 636 |
+
)
|
| 637 |
+
|
| 638 |
+
dt_index = to_datetime(
|
| 639 |
+
[
|
| 640 |
+
"2015-01-03T00:00:00.000000000",
|
| 641 |
+
"2015-01-01T00:00:00.000000000",
|
| 642 |
+
"2015-01-01T00:00:00.000000000",
|
| 643 |
+
]
|
| 644 |
+
)
|
| 645 |
+
result = algos.unique(dt_index)
|
| 646 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 647 |
+
assert result.dtype == expected.dtype
|
| 648 |
+
|
| 649 |
+
s = Series(dt_index)
|
| 650 |
+
result = algos.unique(s)
|
| 651 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 652 |
+
assert result.dtype == expected.dtype
|
| 653 |
+
|
| 654 |
+
arr = s.values
|
| 655 |
+
result = algos.unique(arr)
|
| 656 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 657 |
+
assert result.dtype == expected.dtype
|
| 658 |
+
|
| 659 |
+
def test_datetime_non_ns(self):
|
| 660 |
+
a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]")
|
| 661 |
+
result = pd.unique(a)
|
| 662 |
+
expected = np.array(["2000", "2001"], dtype="datetime64[s]")
|
| 663 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 664 |
+
|
| 665 |
+
def test_timedelta_non_ns(self):
|
| 666 |
+
a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]")
|
| 667 |
+
result = pd.unique(a)
|
| 668 |
+
expected = np.array([2000, 2001], dtype="timedelta64[s]")
|
| 669 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 670 |
+
|
| 671 |
+
def test_timedelta64_dtype_array_returned(self):
|
| 672 |
+
# GH 9431
|
| 673 |
+
expected = np.array([31200, 45678, 10000], dtype="m8[ns]")
|
| 674 |
+
|
| 675 |
+
td_index = to_timedelta([31200, 45678, 31200, 10000, 45678])
|
| 676 |
+
result = algos.unique(td_index)
|
| 677 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 678 |
+
assert result.dtype == expected.dtype
|
| 679 |
+
|
| 680 |
+
s = Series(td_index)
|
| 681 |
+
result = algos.unique(s)
|
| 682 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 683 |
+
assert result.dtype == expected.dtype
|
| 684 |
+
|
| 685 |
+
arr = s.values
|
| 686 |
+
result = algos.unique(arr)
|
| 687 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 688 |
+
assert result.dtype == expected.dtype
|
| 689 |
+
|
| 690 |
+
def test_uint64_overflow(self):
|
| 691 |
+
s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
|
| 692 |
+
exp = np.array([1, 2, 2**63], dtype=np.uint64)
|
| 693 |
+
tm.assert_numpy_array_equal(algos.unique(s), exp)
|
| 694 |
+
|
| 695 |
+
def test_nan_in_object_array(self):
|
| 696 |
+
duplicated_items = ["a", np.nan, "c", "c"]
|
| 697 |
+
result = pd.unique(np.array(duplicated_items, dtype=object))
|
| 698 |
+
expected = np.array(["a", np.nan, "c"], dtype=object)
|
| 699 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 700 |
+
|
| 701 |
+
def test_categorical(self):
|
| 702 |
+
# we are expecting to return in the order
|
| 703 |
+
# of appearance
|
| 704 |
+
expected = Categorical(list("bac"))
|
| 705 |
+
|
| 706 |
+
# we are expecting to return in the order
|
| 707 |
+
# of the categories
|
| 708 |
+
expected_o = Categorical(list("bac"), categories=list("abc"), ordered=True)
|
| 709 |
+
|
| 710 |
+
# GH 15939
|
| 711 |
+
c = Categorical(list("baabc"))
|
| 712 |
+
result = c.unique()
|
| 713 |
+
tm.assert_categorical_equal(result, expected)
|
| 714 |
+
|
| 715 |
+
result = algos.unique(c)
|
| 716 |
+
tm.assert_categorical_equal(result, expected)
|
| 717 |
+
|
| 718 |
+
c = Categorical(list("baabc"), ordered=True)
|
| 719 |
+
result = c.unique()
|
| 720 |
+
tm.assert_categorical_equal(result, expected_o)
|
| 721 |
+
|
| 722 |
+
result = algos.unique(c)
|
| 723 |
+
tm.assert_categorical_equal(result, expected_o)
|
| 724 |
+
|
| 725 |
+
# Series of categorical dtype
|
| 726 |
+
s = Series(Categorical(list("baabc")), name="foo")
|
| 727 |
+
result = s.unique()
|
| 728 |
+
tm.assert_categorical_equal(result, expected)
|
| 729 |
+
|
| 730 |
+
result = pd.unique(s)
|
| 731 |
+
tm.assert_categorical_equal(result, expected)
|
| 732 |
+
|
| 733 |
+
# CI -> return CI
|
| 734 |
+
ci = CategoricalIndex(Categorical(list("baabc"), categories=list("abc")))
|
| 735 |
+
expected = CategoricalIndex(expected)
|
| 736 |
+
result = ci.unique()
|
| 737 |
+
tm.assert_index_equal(result, expected)
|
| 738 |
+
|
| 739 |
+
result = pd.unique(ci)
|
| 740 |
+
tm.assert_index_equal(result, expected)
|
| 741 |
+
|
| 742 |
+
def test_datetime64tz_aware(self, unit):
|
| 743 |
+
# GH 15939
|
| 744 |
+
|
| 745 |
+
dti = Index(
|
| 746 |
+
[
|
| 747 |
+
Timestamp("20160101", tz="US/Eastern"),
|
| 748 |
+
Timestamp("20160101", tz="US/Eastern"),
|
| 749 |
+
]
|
| 750 |
+
).as_unit(unit)
|
| 751 |
+
ser = Series(dti)
|
| 752 |
+
|
| 753 |
+
result = ser.unique()
|
| 754 |
+
expected = dti[:1]._data
|
| 755 |
+
tm.assert_extension_array_equal(result, expected)
|
| 756 |
+
|
| 757 |
+
result = dti.unique()
|
| 758 |
+
expected = dti[:1]
|
| 759 |
+
tm.assert_index_equal(result, expected)
|
| 760 |
+
|
| 761 |
+
result = pd.unique(ser)
|
| 762 |
+
expected = dti[:1]._data
|
| 763 |
+
tm.assert_extension_array_equal(result, expected)
|
| 764 |
+
|
| 765 |
+
result = pd.unique(dti)
|
| 766 |
+
expected = dti[:1]
|
| 767 |
+
tm.assert_index_equal(result, expected)
|
| 768 |
+
|
| 769 |
+
def test_order_of_appearance(self):
|
| 770 |
+
# 9346
|
| 771 |
+
# light testing of guarantee of order of appearance
|
| 772 |
+
# these also are the doc-examples
|
| 773 |
+
result = pd.unique(Series([2, 1, 3, 3]))
|
| 774 |
+
tm.assert_numpy_array_equal(result, np.array([2, 1, 3], dtype="int64"))
|
| 775 |
+
|
| 776 |
+
result = pd.unique(Series([2] + [1] * 5))
|
| 777 |
+
tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64"))
|
| 778 |
+
|
| 779 |
+
msg = "unique with argument that is not not a Series, Index,"
|
| 780 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 781 |
+
result = pd.unique(list("aabc"))
|
| 782 |
+
expected = np.array(["a", "b", "c"], dtype=object)
|
| 783 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 784 |
+
|
| 785 |
+
result = pd.unique(Series(Categorical(list("aabc"))))
|
| 786 |
+
expected = Categorical(list("abc"))
|
| 787 |
+
tm.assert_categorical_equal(result, expected)
|
| 788 |
+
|
| 789 |
+
def test_order_of_appearance_dt64(self, unit):
|
| 790 |
+
ser = Series([Timestamp("20160101"), Timestamp("20160101")]).dt.as_unit(unit)
|
| 791 |
+
result = pd.unique(ser)
|
| 792 |
+
expected = np.array(["2016-01-01T00:00:00.000000000"], dtype=f"M8[{unit}]")
|
| 793 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 794 |
+
|
| 795 |
+
def test_order_of_appearance_dt64tz(self, unit):
|
| 796 |
+
dti = DatetimeIndex(
|
| 797 |
+
[
|
| 798 |
+
Timestamp("20160101", tz="US/Eastern"),
|
| 799 |
+
Timestamp("20160101", tz="US/Eastern"),
|
| 800 |
+
]
|
| 801 |
+
).as_unit(unit)
|
| 802 |
+
result = pd.unique(dti)
|
| 803 |
+
expected = DatetimeIndex(
|
| 804 |
+
["2016-01-01 00:00:00"], dtype=f"datetime64[{unit}, US/Eastern]", freq=None
|
| 805 |
+
)
|
| 806 |
+
tm.assert_index_equal(result, expected)
|
| 807 |
+
|
| 808 |
+
@pytest.mark.parametrize(
|
| 809 |
+
"arg ,expected",
|
| 810 |
+
[
|
| 811 |
+
(("1", "1", "2"), np.array(["1", "2"], dtype=object)),
|
| 812 |
+
(("foo",), np.array(["foo"], dtype=object)),
|
| 813 |
+
],
|
| 814 |
+
)
|
| 815 |
+
def test_tuple_with_strings(self, arg, expected):
|
| 816 |
+
# see GH 17108
|
| 817 |
+
msg = "unique with argument that is not not a Series"
|
| 818 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 819 |
+
result = pd.unique(arg)
|
| 820 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 821 |
+
|
| 822 |
+
def test_obj_none_preservation(self):
|
| 823 |
+
# GH 20866
|
| 824 |
+
arr = np.array(["foo", None], dtype=object)
|
| 825 |
+
result = pd.unique(arr)
|
| 826 |
+
expected = np.array(["foo", None], dtype=object)
|
| 827 |
+
|
| 828 |
+
tm.assert_numpy_array_equal(result, expected, strict_nan=True)
|
| 829 |
+
|
| 830 |
+
def test_signed_zero(self):
|
| 831 |
+
# GH 21866
|
| 832 |
+
a = np.array([-0.0, 0.0])
|
| 833 |
+
result = pd.unique(a)
|
| 834 |
+
expected = np.array([-0.0]) # 0.0 and -0.0 are equivalent
|
| 835 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 836 |
+
|
| 837 |
+
def test_different_nans(self):
|
| 838 |
+
# GH 21866
|
| 839 |
+
# create different nans from bit-patterns:
|
| 840 |
+
NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
|
| 841 |
+
NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
|
| 842 |
+
assert NAN1 != NAN1
|
| 843 |
+
assert NAN2 != NAN2
|
| 844 |
+
a = np.array([NAN1, NAN2]) # NAN1 and NAN2 are equivalent
|
| 845 |
+
result = pd.unique(a)
|
| 846 |
+
expected = np.array([np.nan])
|
| 847 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 848 |
+
|
| 849 |
+
@pytest.mark.parametrize("el_type", [np.float64, object])
|
| 850 |
+
def test_first_nan_kept(self, el_type):
|
| 851 |
+
# GH 22295
|
| 852 |
+
# create different nans from bit-patterns:
|
| 853 |
+
bits_for_nan1 = 0xFFF8000000000001
|
| 854 |
+
bits_for_nan2 = 0x7FF8000000000001
|
| 855 |
+
NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0]
|
| 856 |
+
NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0]
|
| 857 |
+
assert NAN1 != NAN1
|
| 858 |
+
assert NAN2 != NAN2
|
| 859 |
+
a = np.array([NAN1, NAN2], dtype=el_type)
|
| 860 |
+
result = pd.unique(a)
|
| 861 |
+
assert result.size == 1
|
| 862 |
+
# use bit patterns to identify which nan was kept:
|
| 863 |
+
result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0]
|
| 864 |
+
assert result_nan_bits == bits_for_nan1
|
| 865 |
+
|
| 866 |
+
def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2):
|
| 867 |
+
# GH 22295
|
| 868 |
+
if unique_nulls_fixture is unique_nulls_fixture2:
|
| 869 |
+
return # skip it, values not unique
|
| 870 |
+
a = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
|
| 871 |
+
result = pd.unique(a)
|
| 872 |
+
assert result.size == 2
|
| 873 |
+
assert a[0] is unique_nulls_fixture
|
| 874 |
+
assert a[1] is unique_nulls_fixture2
|
| 875 |
+
|
| 876 |
+
def test_unique_masked(self, any_numeric_ea_dtype):
|
| 877 |
+
# GH#48019
|
| 878 |
+
ser = Series([1, pd.NA, 2] * 3, dtype=any_numeric_ea_dtype)
|
| 879 |
+
result = pd.unique(ser)
|
| 880 |
+
expected = pd.array([1, pd.NA, 2], dtype=any_numeric_ea_dtype)
|
| 881 |
+
tm.assert_extension_array_equal(result, expected)
|
| 882 |
+
|
| 883 |
+
|
| 884 |
+
def test_nunique_ints(index_or_series_or_array):
|
| 885 |
+
# GH#36327
|
| 886 |
+
values = index_or_series_or_array(np.random.default_rng(2).integers(0, 20, 30))
|
| 887 |
+
result = algos.nunique_ints(values)
|
| 888 |
+
expected = len(algos.unique(values))
|
| 889 |
+
assert result == expected
|
| 890 |
+
|
| 891 |
+
|
| 892 |
+
class TestIsin:
|
| 893 |
+
def test_invalid(self):
|
| 894 |
+
msg = (
|
| 895 |
+
r"only list-like objects are allowed to be passed to isin\(\), "
|
| 896 |
+
r"you passed a `int`"
|
| 897 |
+
)
|
| 898 |
+
with pytest.raises(TypeError, match=msg):
|
| 899 |
+
algos.isin(1, 1)
|
| 900 |
+
with pytest.raises(TypeError, match=msg):
|
| 901 |
+
algos.isin(1, [1])
|
| 902 |
+
with pytest.raises(TypeError, match=msg):
|
| 903 |
+
algos.isin([1], 1)
|
| 904 |
+
|
| 905 |
+
def test_basic(self):
|
| 906 |
+
msg = "isin with argument that is not not a Series"
|
| 907 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 908 |
+
result = algos.isin([1, 2], [1])
|
| 909 |
+
expected = np.array([True, False])
|
| 910 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 911 |
+
|
| 912 |
+
result = algos.isin(np.array([1, 2]), [1])
|
| 913 |
+
expected = np.array([True, False])
|
| 914 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 915 |
+
|
| 916 |
+
result = algos.isin(Series([1, 2]), [1])
|
| 917 |
+
expected = np.array([True, False])
|
| 918 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 919 |
+
|
| 920 |
+
result = algos.isin(Series([1, 2]), Series([1]))
|
| 921 |
+
expected = np.array([True, False])
|
| 922 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 923 |
+
|
| 924 |
+
result = algos.isin(Series([1, 2]), {1})
|
| 925 |
+
expected = np.array([True, False])
|
| 926 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 927 |
+
|
| 928 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 929 |
+
result = algos.isin(["a", "b"], ["a"])
|
| 930 |
+
expected = np.array([True, False])
|
| 931 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 932 |
+
|
| 933 |
+
result = algos.isin(Series(["a", "b"]), Series(["a"]))
|
| 934 |
+
expected = np.array([True, False])
|
| 935 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 936 |
+
|
| 937 |
+
result = algos.isin(Series(["a", "b"]), {"a"})
|
| 938 |
+
expected = np.array([True, False])
|
| 939 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 940 |
+
|
| 941 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 942 |
+
result = algos.isin(["a", "b"], [1])
|
| 943 |
+
expected = np.array([False, False])
|
| 944 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 945 |
+
|
| 946 |
+
def test_i8(self):
|
| 947 |
+
arr = date_range("20130101", periods=3).values
|
| 948 |
+
result = algos.isin(arr, [arr[0]])
|
| 949 |
+
expected = np.array([True, False, False])
|
| 950 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 951 |
+
|
| 952 |
+
result = algos.isin(arr, arr[0:2])
|
| 953 |
+
expected = np.array([True, True, False])
|
| 954 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 955 |
+
|
| 956 |
+
result = algos.isin(arr, set(arr[0:2]))
|
| 957 |
+
expected = np.array([True, True, False])
|
| 958 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 959 |
+
|
| 960 |
+
arr = timedelta_range("1 day", periods=3).values
|
| 961 |
+
result = algos.isin(arr, [arr[0]])
|
| 962 |
+
expected = np.array([True, False, False])
|
| 963 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 964 |
+
|
| 965 |
+
result = algos.isin(arr, arr[0:2])
|
| 966 |
+
expected = np.array([True, True, False])
|
| 967 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 968 |
+
|
| 969 |
+
result = algos.isin(arr, set(arr[0:2]))
|
| 970 |
+
expected = np.array([True, True, False])
|
| 971 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 972 |
+
|
| 973 |
+
@pytest.mark.parametrize("dtype1", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
|
| 974 |
+
@pytest.mark.parametrize("dtype", ["i8", "f8", "u8"])
|
| 975 |
+
def test_isin_datetimelike_values_numeric_comps(self, dtype, dtype1):
|
| 976 |
+
# Anything but object and we get all-False shortcut
|
| 977 |
+
|
| 978 |
+
dta = date_range("2013-01-01", periods=3)._values
|
| 979 |
+
arr = Series(dta.view("i8")).array.view(dtype1)
|
| 980 |
+
|
| 981 |
+
comps = arr.view("i8").astype(dtype)
|
| 982 |
+
|
| 983 |
+
result = algos.isin(comps, arr)
|
| 984 |
+
expected = np.zeros(comps.shape, dtype=bool)
|
| 985 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 986 |
+
|
| 987 |
+
def test_large(self):
|
| 988 |
+
s = date_range("20000101", periods=2000000, freq="s").values
|
| 989 |
+
result = algos.isin(s, s[0:2])
|
| 990 |
+
expected = np.zeros(len(s), dtype=bool)
|
| 991 |
+
expected[0] = True
|
| 992 |
+
expected[1] = True
|
| 993 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 994 |
+
|
| 995 |
+
@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
|
| 996 |
+
def test_isin_datetimelike_all_nat(self, dtype):
|
| 997 |
+
# GH#56427
|
| 998 |
+
dta = date_range("2013-01-01", periods=3)._values
|
| 999 |
+
arr = Series(dta.view("i8")).array.view(dtype)
|
| 1000 |
+
|
| 1001 |
+
arr[0] = NaT
|
| 1002 |
+
result = algos.isin(arr, [NaT])
|
| 1003 |
+
expected = np.array([True, False, False], dtype=bool)
|
| 1004 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1005 |
+
|
| 1006 |
+
@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]"])
|
| 1007 |
+
def test_isin_datetimelike_strings_deprecated(self, dtype):
|
| 1008 |
+
# GH#53111
|
| 1009 |
+
dta = date_range("2013-01-01", periods=3)._values
|
| 1010 |
+
arr = Series(dta.view("i8")).array.view(dtype)
|
| 1011 |
+
|
| 1012 |
+
vals = [str(x) for x in arr]
|
| 1013 |
+
msg = "The behavior of 'isin' with dtype=.* is deprecated"
|
| 1014 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1015 |
+
res = algos.isin(arr, vals)
|
| 1016 |
+
assert res.all()
|
| 1017 |
+
|
| 1018 |
+
vals2 = np.array(vals, dtype=str)
|
| 1019 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1020 |
+
res2 = algos.isin(arr, vals2)
|
| 1021 |
+
assert res2.all()
|
| 1022 |
+
|
| 1023 |
+
def test_isin_dt64tz_with_nat(self):
|
| 1024 |
+
# the all-NaT values used to get inferred to tznaive, which was evaluated
|
| 1025 |
+
# as non-matching GH#56427
|
| 1026 |
+
dti = date_range("2016-01-01", periods=3, tz="UTC")
|
| 1027 |
+
ser = Series(dti)
|
| 1028 |
+
ser[0] = NaT
|
| 1029 |
+
|
| 1030 |
+
res = algos.isin(ser._values, [NaT])
|
| 1031 |
+
exp = np.array([True, False, False], dtype=bool)
|
| 1032 |
+
tm.assert_numpy_array_equal(res, exp)
|
| 1033 |
+
|
| 1034 |
+
def test_categorical_from_codes(self):
|
| 1035 |
+
# GH 16639
|
| 1036 |
+
vals = np.array([0, 1, 2, 0])
|
| 1037 |
+
cats = ["a", "b", "c"]
|
| 1038 |
+
Sd = Series(Categorical([1]).from_codes(vals, cats))
|
| 1039 |
+
St = Series(Categorical([1]).from_codes(np.array([0, 1]), cats))
|
| 1040 |
+
expected = np.array([True, True, False, True])
|
| 1041 |
+
result = algos.isin(Sd, St)
|
| 1042 |
+
tm.assert_numpy_array_equal(expected, result)
|
| 1043 |
+
|
| 1044 |
+
def test_categorical_isin(self):
|
| 1045 |
+
vals = np.array([0, 1, 2, 0])
|
| 1046 |
+
cats = ["a", "b", "c"]
|
| 1047 |
+
cat = Categorical([1]).from_codes(vals, cats)
|
| 1048 |
+
other = Categorical([1]).from_codes(np.array([0, 1]), cats)
|
| 1049 |
+
|
| 1050 |
+
expected = np.array([True, True, False, True])
|
| 1051 |
+
result = algos.isin(cat, other)
|
| 1052 |
+
tm.assert_numpy_array_equal(expected, result)
|
| 1053 |
+
|
| 1054 |
+
def test_same_nan_is_in(self):
|
| 1055 |
+
# GH 22160
|
| 1056 |
+
# nan is special, because from " a is b" doesn't follow "a == b"
|
| 1057 |
+
# at least, isin() should follow python's "np.nan in [nan] == True"
|
| 1058 |
+
# casting to -> np.float64 -> another float-object somewhere on
|
| 1059 |
+
# the way could lead jeopardize this behavior
|
| 1060 |
+
comps = [np.nan] # could be casted to float64
|
| 1061 |
+
values = [np.nan]
|
| 1062 |
+
expected = np.array([True])
|
| 1063 |
+
msg = "isin with argument that is not not a Series"
|
| 1064 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1065 |
+
result = algos.isin(comps, values)
|
| 1066 |
+
tm.assert_numpy_array_equal(expected, result)
|
| 1067 |
+
|
| 1068 |
+
def test_same_nan_is_in_large(self):
|
| 1069 |
+
# https://github.com/pandas-dev/pandas/issues/22205
|
| 1070 |
+
s = np.tile(1.0, 1_000_001)
|
| 1071 |
+
s[0] = np.nan
|
| 1072 |
+
result = algos.isin(s, np.array([np.nan, 1]))
|
| 1073 |
+
expected = np.ones(len(s), dtype=bool)
|
| 1074 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1075 |
+
|
| 1076 |
+
def test_same_nan_is_in_large_series(self):
|
| 1077 |
+
# https://github.com/pandas-dev/pandas/issues/22205
|
| 1078 |
+
s = np.tile(1.0, 1_000_001)
|
| 1079 |
+
series = Series(s)
|
| 1080 |
+
s[0] = np.nan
|
| 1081 |
+
result = series.isin(np.array([np.nan, 1]))
|
| 1082 |
+
expected = Series(np.ones(len(s), dtype=bool))
|
| 1083 |
+
tm.assert_series_equal(result, expected)
|
| 1084 |
+
|
| 1085 |
+
def test_same_object_is_in(self):
|
| 1086 |
+
# GH 22160
|
| 1087 |
+
# there could be special treatment for nans
|
| 1088 |
+
# the user however could define a custom class
|
| 1089 |
+
# with similar behavior, then we at least should
|
| 1090 |
+
# fall back to usual python's behavior: "a in [a] == True"
|
| 1091 |
+
class LikeNan:
|
| 1092 |
+
def __eq__(self, other) -> bool:
|
| 1093 |
+
return False
|
| 1094 |
+
|
| 1095 |
+
def __hash__(self):
|
| 1096 |
+
return 0
|
| 1097 |
+
|
| 1098 |
+
a, b = LikeNan(), LikeNan()
|
| 1099 |
+
|
| 1100 |
+
msg = "isin with argument that is not not a Series"
|
| 1101 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1102 |
+
# same object -> True
|
| 1103 |
+
tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True]))
|
| 1104 |
+
# different objects -> False
|
| 1105 |
+
tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False]))
|
| 1106 |
+
|
| 1107 |
+
def test_different_nans(self):
|
| 1108 |
+
# GH 22160
|
| 1109 |
+
# all nans are handled as equivalent
|
| 1110 |
+
|
| 1111 |
+
comps = [float("nan")]
|
| 1112 |
+
values = [float("nan")]
|
| 1113 |
+
assert comps[0] is not values[0] # different nan-objects
|
| 1114 |
+
|
| 1115 |
+
# as list of python-objects:
|
| 1116 |
+
result = algos.isin(np.array(comps), values)
|
| 1117 |
+
tm.assert_numpy_array_equal(np.array([True]), result)
|
| 1118 |
+
|
| 1119 |
+
# as object-array:
|
| 1120 |
+
result = algos.isin(
|
| 1121 |
+
np.asarray(comps, dtype=object), np.asarray(values, dtype=object)
|
| 1122 |
+
)
|
| 1123 |
+
tm.assert_numpy_array_equal(np.array([True]), result)
|
| 1124 |
+
|
| 1125 |
+
# as float64-array:
|
| 1126 |
+
result = algos.isin(
|
| 1127 |
+
np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64)
|
| 1128 |
+
)
|
| 1129 |
+
tm.assert_numpy_array_equal(np.array([True]), result)
|
| 1130 |
+
|
| 1131 |
+
def test_no_cast(self):
|
| 1132 |
+
# GH 22160
|
| 1133 |
+
# ensure 42 is not casted to a string
|
| 1134 |
+
comps = ["ss", 42]
|
| 1135 |
+
values = ["42"]
|
| 1136 |
+
expected = np.array([False, False])
|
| 1137 |
+
msg = "isin with argument that is not not a Series, Index"
|
| 1138 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1139 |
+
result = algos.isin(comps, values)
|
| 1140 |
+
tm.assert_numpy_array_equal(expected, result)
|
| 1141 |
+
|
| 1142 |
+
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
|
| 1143 |
+
def test_empty(self, empty):
|
| 1144 |
+
# see gh-16991
|
| 1145 |
+
vals = Index(["a", "b"])
|
| 1146 |
+
expected = np.array([False, False])
|
| 1147 |
+
|
| 1148 |
+
result = algos.isin(vals, empty)
|
| 1149 |
+
tm.assert_numpy_array_equal(expected, result)
|
| 1150 |
+
|
| 1151 |
+
def test_different_nan_objects(self):
|
| 1152 |
+
# GH 22119
|
| 1153 |
+
comps = np.array(["nan", np.nan * 1j, float("nan")], dtype=object)
|
| 1154 |
+
vals = np.array([float("nan")], dtype=object)
|
| 1155 |
+
expected = np.array([False, False, True])
|
| 1156 |
+
result = algos.isin(comps, vals)
|
| 1157 |
+
tm.assert_numpy_array_equal(expected, result)
|
| 1158 |
+
|
| 1159 |
+
def test_different_nans_as_float64(self):
|
| 1160 |
+
# GH 21866
|
| 1161 |
+
# create different nans from bit-patterns,
|
| 1162 |
+
# these nans will land in different buckets in the hash-table
|
| 1163 |
+
# if no special care is taken
|
| 1164 |
+
NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
|
| 1165 |
+
NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
|
| 1166 |
+
assert NAN1 != NAN1
|
| 1167 |
+
assert NAN2 != NAN2
|
| 1168 |
+
|
| 1169 |
+
# check that NAN1 and NAN2 are equivalent:
|
| 1170 |
+
arr = np.array([NAN1, NAN2], dtype=np.float64)
|
| 1171 |
+
lookup1 = np.array([NAN1], dtype=np.float64)
|
| 1172 |
+
result = algos.isin(arr, lookup1)
|
| 1173 |
+
expected = np.array([True, True])
|
| 1174 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1175 |
+
|
| 1176 |
+
lookup2 = np.array([NAN2], dtype=np.float64)
|
| 1177 |
+
result = algos.isin(arr, lookup2)
|
| 1178 |
+
expected = np.array([True, True])
|
| 1179 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1180 |
+
|
| 1181 |
+
def test_isin_int_df_string_search(self):
|
| 1182 |
+
"""Comparing df with int`s (1,2) with a string at isin() ("1")
|
| 1183 |
+
-> should not match values because int 1 is not equal str 1"""
|
| 1184 |
+
df = DataFrame({"values": [1, 2]})
|
| 1185 |
+
result = df.isin(["1"])
|
| 1186 |
+
expected_false = DataFrame({"values": [False, False]})
|
| 1187 |
+
tm.assert_frame_equal(result, expected_false)
|
| 1188 |
+
|
| 1189 |
+
def test_isin_nan_df_string_search(self):
|
| 1190 |
+
"""Comparing df with nan value (np.nan,2) with a string at isin() ("NaN")
|
| 1191 |
+
-> should not match values because np.nan is not equal str NaN"""
|
| 1192 |
+
df = DataFrame({"values": [np.nan, 2]})
|
| 1193 |
+
result = df.isin(np.array(["NaN"], dtype=object))
|
| 1194 |
+
expected_false = DataFrame({"values": [False, False]})
|
| 1195 |
+
tm.assert_frame_equal(result, expected_false)
|
| 1196 |
+
|
| 1197 |
+
def test_isin_float_df_string_search(self):
|
| 1198 |
+
"""Comparing df with floats (1.4245,2.32441) with a string at isin() ("1.4245")
|
| 1199 |
+
-> should not match values because float 1.4245 is not equal str 1.4245"""
|
| 1200 |
+
df = DataFrame({"values": [1.4245, 2.32441]})
|
| 1201 |
+
result = df.isin(np.array(["1.4245"], dtype=object))
|
| 1202 |
+
expected_false = DataFrame({"values": [False, False]})
|
| 1203 |
+
tm.assert_frame_equal(result, expected_false)
|
| 1204 |
+
|
| 1205 |
+
def test_isin_unsigned_dtype(self):
|
| 1206 |
+
# GH#46485
|
| 1207 |
+
ser = Series([1378774140726870442], dtype=np.uint64)
|
| 1208 |
+
result = ser.isin([1378774140726870528])
|
| 1209 |
+
expected = Series(False)
|
| 1210 |
+
tm.assert_series_equal(result, expected)
|
| 1211 |
+
|
| 1212 |
+
|
| 1213 |
+
class TestValueCounts:
|
| 1214 |
+
def test_value_counts(self):
|
| 1215 |
+
arr = np.random.default_rng(1234).standard_normal(4)
|
| 1216 |
+
factor = cut(arr, 4)
|
| 1217 |
+
|
| 1218 |
+
# assert isinstance(factor, n)
|
| 1219 |
+
msg = "pandas.value_counts is deprecated"
|
| 1220 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1221 |
+
result = algos.value_counts(factor)
|
| 1222 |
+
breaks = [-1.606, -1.018, -0.431, 0.155, 0.741]
|
| 1223 |
+
index = IntervalIndex.from_breaks(breaks).astype(CategoricalDtype(ordered=True))
|
| 1224 |
+
expected = Series([1, 0, 2, 1], index=index, name="count")
|
| 1225 |
+
tm.assert_series_equal(result.sort_index(), expected.sort_index())
|
| 1226 |
+
|
| 1227 |
+
def test_value_counts_bins(self):
|
| 1228 |
+
s = [1, 2, 3, 4]
|
| 1229 |
+
msg = "pandas.value_counts is deprecated"
|
| 1230 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1231 |
+
result = algos.value_counts(s, bins=1)
|
| 1232 |
+
expected = Series(
|
| 1233 |
+
[4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count"
|
| 1234 |
+
)
|
| 1235 |
+
tm.assert_series_equal(result, expected)
|
| 1236 |
+
|
| 1237 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1238 |
+
result = algos.value_counts(s, bins=2, sort=False)
|
| 1239 |
+
expected = Series(
|
| 1240 |
+
[2, 2],
|
| 1241 |
+
index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]),
|
| 1242 |
+
name="count",
|
| 1243 |
+
)
|
| 1244 |
+
tm.assert_series_equal(result, expected)
|
| 1245 |
+
|
| 1246 |
+
def test_value_counts_dtypes(self):
|
| 1247 |
+
msg2 = "pandas.value_counts is deprecated"
|
| 1248 |
+
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
| 1249 |
+
result = algos.value_counts(np.array([1, 1.0]))
|
| 1250 |
+
assert len(result) == 1
|
| 1251 |
+
|
| 1252 |
+
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
| 1253 |
+
result = algos.value_counts(np.array([1, 1.0]), bins=1)
|
| 1254 |
+
assert len(result) == 1
|
| 1255 |
+
|
| 1256 |
+
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
| 1257 |
+
result = algos.value_counts(Series([1, 1.0, "1"])) # object
|
| 1258 |
+
assert len(result) == 2
|
| 1259 |
+
|
| 1260 |
+
msg = "bins argument only works with numeric data"
|
| 1261 |
+
with pytest.raises(TypeError, match=msg):
|
| 1262 |
+
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
| 1263 |
+
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
|
| 1264 |
+
|
| 1265 |
+
def test_value_counts_nat(self):
|
| 1266 |
+
td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]")
|
| 1267 |
+
dt = to_datetime(["NaT", "2014-01-01"])
|
| 1268 |
+
|
| 1269 |
+
msg = "pandas.value_counts is deprecated"
|
| 1270 |
+
|
| 1271 |
+
for ser in [td, dt]:
|
| 1272 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1273 |
+
vc = algos.value_counts(ser)
|
| 1274 |
+
vc_with_na = algos.value_counts(ser, dropna=False)
|
| 1275 |
+
assert len(vc) == 1
|
| 1276 |
+
assert len(vc_with_na) == 2
|
| 1277 |
+
|
| 1278 |
+
exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count")
|
| 1279 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1280 |
+
result_dt = algos.value_counts(dt)
|
| 1281 |
+
tm.assert_series_equal(result_dt, exp_dt)
|
| 1282 |
+
|
| 1283 |
+
exp_td = Series({np.timedelta64(10000): 1}, name="count")
|
| 1284 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1285 |
+
result_td = algos.value_counts(td)
|
| 1286 |
+
tm.assert_series_equal(result_td, exp_td)
|
| 1287 |
+
|
| 1288 |
+
@pytest.mark.parametrize("dtype", [object, "M8[us]"])
|
| 1289 |
+
def test_value_counts_datetime_outofbounds(self, dtype):
|
| 1290 |
+
# GH 13663
|
| 1291 |
+
ser = Series(
|
| 1292 |
+
[
|
| 1293 |
+
datetime(3000, 1, 1),
|
| 1294 |
+
datetime(5000, 1, 1),
|
| 1295 |
+
datetime(5000, 1, 1),
|
| 1296 |
+
datetime(6000, 1, 1),
|
| 1297 |
+
datetime(3000, 1, 1),
|
| 1298 |
+
datetime(3000, 1, 1),
|
| 1299 |
+
],
|
| 1300 |
+
dtype=dtype,
|
| 1301 |
+
)
|
| 1302 |
+
res = ser.value_counts()
|
| 1303 |
+
|
| 1304 |
+
exp_index = Index(
|
| 1305 |
+
[datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)],
|
| 1306 |
+
dtype=dtype,
|
| 1307 |
+
)
|
| 1308 |
+
exp = Series([3, 2, 1], index=exp_index, name="count")
|
| 1309 |
+
tm.assert_series_equal(res, exp)
|
| 1310 |
+
|
| 1311 |
+
def test_categorical(self):
|
| 1312 |
+
s = Series(Categorical(list("aaabbc")))
|
| 1313 |
+
result = s.value_counts()
|
| 1314 |
+
expected = Series(
|
| 1315 |
+
[3, 2, 1], index=CategoricalIndex(["a", "b", "c"]), name="count"
|
| 1316 |
+
)
|
| 1317 |
+
|
| 1318 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1319 |
+
|
| 1320 |
+
# preserve order?
|
| 1321 |
+
s = s.cat.as_ordered()
|
| 1322 |
+
result = s.value_counts()
|
| 1323 |
+
expected.index = expected.index.as_ordered()
|
| 1324 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1325 |
+
|
| 1326 |
+
def test_categorical_nans(self):
|
| 1327 |
+
s = Series(Categorical(list("aaaaabbbcc"))) # 4,3,2,1 (nan)
|
| 1328 |
+
s.iloc[1] = np.nan
|
| 1329 |
+
result = s.value_counts()
|
| 1330 |
+
expected = Series(
|
| 1331 |
+
[4, 3, 2],
|
| 1332 |
+
index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]),
|
| 1333 |
+
name="count",
|
| 1334 |
+
)
|
| 1335 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1336 |
+
result = s.value_counts(dropna=False)
|
| 1337 |
+
expected = Series(
|
| 1338 |
+
[4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan]), name="count"
|
| 1339 |
+
)
|
| 1340 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1341 |
+
|
| 1342 |
+
# out of order
|
| 1343 |
+
s = Series(
|
| 1344 |
+
Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"])
|
| 1345 |
+
)
|
| 1346 |
+
s.iloc[1] = np.nan
|
| 1347 |
+
result = s.value_counts()
|
| 1348 |
+
expected = Series(
|
| 1349 |
+
[4, 3, 2],
|
| 1350 |
+
index=CategoricalIndex(
|
| 1351 |
+
["a", "b", "c"],
|
| 1352 |
+
categories=["b", "a", "c"],
|
| 1353 |
+
ordered=True,
|
| 1354 |
+
),
|
| 1355 |
+
name="count",
|
| 1356 |
+
)
|
| 1357 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1358 |
+
|
| 1359 |
+
result = s.value_counts(dropna=False)
|
| 1360 |
+
expected = Series(
|
| 1361 |
+
[4, 3, 2, 1],
|
| 1362 |
+
index=CategoricalIndex(
|
| 1363 |
+
["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True
|
| 1364 |
+
),
|
| 1365 |
+
name="count",
|
| 1366 |
+
)
|
| 1367 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1368 |
+
|
| 1369 |
+
def test_categorical_zeroes(self):
|
| 1370 |
+
# keep the `d` category with 0
|
| 1371 |
+
s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True))
|
| 1372 |
+
result = s.value_counts()
|
| 1373 |
+
expected = Series(
|
| 1374 |
+
[3, 2, 1, 0],
|
| 1375 |
+
index=Categorical(
|
| 1376 |
+
["b", "a", "c", "d"], categories=list("abcd"), ordered=True
|
| 1377 |
+
),
|
| 1378 |
+
name="count",
|
| 1379 |
+
)
|
| 1380 |
+
tm.assert_series_equal(result, expected, check_index_type=True)
|
| 1381 |
+
|
| 1382 |
+
def test_value_counts_dropna(self):
|
| 1383 |
+
# https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328
|
| 1384 |
+
|
| 1385 |
+
tm.assert_series_equal(
|
| 1386 |
+
Series([True, True, False]).value_counts(dropna=True),
|
| 1387 |
+
Series([2, 1], index=[True, False], name="count"),
|
| 1388 |
+
)
|
| 1389 |
+
tm.assert_series_equal(
|
| 1390 |
+
Series([True, True, False]).value_counts(dropna=False),
|
| 1391 |
+
Series([2, 1], index=[True, False], name="count"),
|
| 1392 |
+
)
|
| 1393 |
+
|
| 1394 |
+
tm.assert_series_equal(
|
| 1395 |
+
Series([True] * 3 + [False] * 2 + [None] * 5).value_counts(dropna=True),
|
| 1396 |
+
Series([3, 2], index=Index([True, False], dtype=object), name="count"),
|
| 1397 |
+
)
|
| 1398 |
+
tm.assert_series_equal(
|
| 1399 |
+
Series([True] * 5 + [False] * 3 + [None] * 2).value_counts(dropna=False),
|
| 1400 |
+
Series([5, 3, 2], index=[True, False, None], name="count"),
|
| 1401 |
+
)
|
| 1402 |
+
tm.assert_series_equal(
|
| 1403 |
+
Series([10.3, 5.0, 5.0]).value_counts(dropna=True),
|
| 1404 |
+
Series([2, 1], index=[5.0, 10.3], name="count"),
|
| 1405 |
+
)
|
| 1406 |
+
tm.assert_series_equal(
|
| 1407 |
+
Series([10.3, 5.0, 5.0]).value_counts(dropna=False),
|
| 1408 |
+
Series([2, 1], index=[5.0, 10.3], name="count"),
|
| 1409 |
+
)
|
| 1410 |
+
|
| 1411 |
+
tm.assert_series_equal(
|
| 1412 |
+
Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True),
|
| 1413 |
+
Series([2, 1], index=[5.0, 10.3], name="count"),
|
| 1414 |
+
)
|
| 1415 |
+
|
| 1416 |
+
result = Series([10.3, 10.3, 5.0, 5.0, 5.0, None]).value_counts(dropna=False)
|
| 1417 |
+
expected = Series([3, 2, 1], index=[5.0, 10.3, None], name="count")
|
| 1418 |
+
tm.assert_series_equal(result, expected)
|
| 1419 |
+
|
| 1420 |
+
@pytest.mark.parametrize("dtype", (np.float64, object, "M8[ns]"))
|
| 1421 |
+
def test_value_counts_normalized(self, dtype):
|
| 1422 |
+
# GH12558
|
| 1423 |
+
s = Series([1] * 2 + [2] * 3 + [np.nan] * 5)
|
| 1424 |
+
s_typed = s.astype(dtype)
|
| 1425 |
+
result = s_typed.value_counts(normalize=True, dropna=False)
|
| 1426 |
+
expected = Series(
|
| 1427 |
+
[0.5, 0.3, 0.2],
|
| 1428 |
+
index=Series([np.nan, 2.0, 1.0], dtype=dtype),
|
| 1429 |
+
name="proportion",
|
| 1430 |
+
)
|
| 1431 |
+
tm.assert_series_equal(result, expected)
|
| 1432 |
+
|
| 1433 |
+
result = s_typed.value_counts(normalize=True, dropna=True)
|
| 1434 |
+
expected = Series(
|
| 1435 |
+
[0.6, 0.4], index=Series([2.0, 1.0], dtype=dtype), name="proportion"
|
| 1436 |
+
)
|
| 1437 |
+
tm.assert_series_equal(result, expected)
|
| 1438 |
+
|
| 1439 |
+
def test_value_counts_uint64(self):
|
| 1440 |
+
arr = np.array([2**63], dtype=np.uint64)
|
| 1441 |
+
expected = Series([1], index=[2**63], name="count")
|
| 1442 |
+
msg = "pandas.value_counts is deprecated"
|
| 1443 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1444 |
+
result = algos.value_counts(arr)
|
| 1445 |
+
|
| 1446 |
+
tm.assert_series_equal(result, expected)
|
| 1447 |
+
|
| 1448 |
+
arr = np.array([-1, 2**63], dtype=object)
|
| 1449 |
+
expected = Series([1, 1], index=[-1, 2**63], name="count")
|
| 1450 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1451 |
+
result = algos.value_counts(arr)
|
| 1452 |
+
|
| 1453 |
+
tm.assert_series_equal(result, expected)
|
| 1454 |
+
|
| 1455 |
+
def test_value_counts_series(self):
|
| 1456 |
+
# GH#54857
|
| 1457 |
+
values = np.array([3, 1, 2, 3, 4, np.nan])
|
| 1458 |
+
result = Series(values).value_counts(bins=3)
|
| 1459 |
+
expected = Series(
|
| 1460 |
+
[2, 2, 1],
|
| 1461 |
+
index=IntervalIndex.from_tuples(
|
| 1462 |
+
[(0.996, 2.0), (2.0, 3.0), (3.0, 4.0)], dtype="interval[float64, right]"
|
| 1463 |
+
),
|
| 1464 |
+
name="count",
|
| 1465 |
+
)
|
| 1466 |
+
tm.assert_series_equal(result, expected)
|
| 1467 |
+
|
| 1468 |
+
|
| 1469 |
+
class TestDuplicated:
|
| 1470 |
+
def test_duplicated_with_nas(self):
|
| 1471 |
+
keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)
|
| 1472 |
+
|
| 1473 |
+
result = algos.duplicated(keys)
|
| 1474 |
+
expected = np.array([False, False, False, True, False, True])
|
| 1475 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1476 |
+
|
| 1477 |
+
result = algos.duplicated(keys, keep="first")
|
| 1478 |
+
expected = np.array([False, False, False, True, False, True])
|
| 1479 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1480 |
+
|
| 1481 |
+
result = algos.duplicated(keys, keep="last")
|
| 1482 |
+
expected = np.array([True, False, True, False, False, False])
|
| 1483 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1484 |
+
|
| 1485 |
+
result = algos.duplicated(keys, keep=False)
|
| 1486 |
+
expected = np.array([True, False, True, True, False, True])
|
| 1487 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1488 |
+
|
| 1489 |
+
keys = np.empty(8, dtype=object)
|
| 1490 |
+
for i, t in enumerate(
|
| 1491 |
+
zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)
|
| 1492 |
+
):
|
| 1493 |
+
keys[i] = t
|
| 1494 |
+
|
| 1495 |
+
result = algos.duplicated(keys)
|
| 1496 |
+
falses = [False] * 4
|
| 1497 |
+
trues = [True] * 4
|
| 1498 |
+
expected = np.array(falses + trues)
|
| 1499 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1500 |
+
|
| 1501 |
+
result = algos.duplicated(keys, keep="last")
|
| 1502 |
+
expected = np.array(trues + falses)
|
| 1503 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1504 |
+
|
| 1505 |
+
result = algos.duplicated(keys, keep=False)
|
| 1506 |
+
expected = np.array(trues + trues)
|
| 1507 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1508 |
+
|
| 1509 |
+
@pytest.mark.parametrize(
|
| 1510 |
+
"case",
|
| 1511 |
+
[
|
| 1512 |
+
np.array([1, 2, 1, 5, 3, 2, 4, 1, 5, 6]),
|
| 1513 |
+
np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]),
|
| 1514 |
+
np.array(
|
| 1515 |
+
[
|
| 1516 |
+
1 + 1j,
|
| 1517 |
+
2 + 2j,
|
| 1518 |
+
1 + 1j,
|
| 1519 |
+
5 + 5j,
|
| 1520 |
+
3 + 3j,
|
| 1521 |
+
2 + 2j,
|
| 1522 |
+
4 + 4j,
|
| 1523 |
+
1 + 1j,
|
| 1524 |
+
5 + 5j,
|
| 1525 |
+
6 + 6j,
|
| 1526 |
+
]
|
| 1527 |
+
),
|
| 1528 |
+
np.array(["a", "b", "a", "e", "c", "b", "d", "a", "e", "f"], dtype=object),
|
| 1529 |
+
np.array(
|
| 1530 |
+
[1, 2**63, 1, 3**5, 10, 2**63, 39, 1, 3**5, 7], dtype=np.uint64
|
| 1531 |
+
),
|
| 1532 |
+
],
|
| 1533 |
+
)
|
| 1534 |
+
def test_numeric_object_likes(self, case):
|
| 1535 |
+
exp_first = np.array(
|
| 1536 |
+
[False, False, True, False, False, True, False, True, True, False]
|
| 1537 |
+
)
|
| 1538 |
+
exp_last = np.array(
|
| 1539 |
+
[True, True, True, True, False, False, False, False, False, False]
|
| 1540 |
+
)
|
| 1541 |
+
exp_false = exp_first | exp_last
|
| 1542 |
+
|
| 1543 |
+
res_first = algos.duplicated(case, keep="first")
|
| 1544 |
+
tm.assert_numpy_array_equal(res_first, exp_first)
|
| 1545 |
+
|
| 1546 |
+
res_last = algos.duplicated(case, keep="last")
|
| 1547 |
+
tm.assert_numpy_array_equal(res_last, exp_last)
|
| 1548 |
+
|
| 1549 |
+
res_false = algos.duplicated(case, keep=False)
|
| 1550 |
+
tm.assert_numpy_array_equal(res_false, exp_false)
|
| 1551 |
+
|
| 1552 |
+
# index
|
| 1553 |
+
for idx in [Index(case), Index(case, dtype="category")]:
|
| 1554 |
+
res_first = idx.duplicated(keep="first")
|
| 1555 |
+
tm.assert_numpy_array_equal(res_first, exp_first)
|
| 1556 |
+
|
| 1557 |
+
res_last = idx.duplicated(keep="last")
|
| 1558 |
+
tm.assert_numpy_array_equal(res_last, exp_last)
|
| 1559 |
+
|
| 1560 |
+
res_false = idx.duplicated(keep=False)
|
| 1561 |
+
tm.assert_numpy_array_equal(res_false, exp_false)
|
| 1562 |
+
|
| 1563 |
+
# series
|
| 1564 |
+
for s in [Series(case), Series(case, dtype="category")]:
|
| 1565 |
+
res_first = s.duplicated(keep="first")
|
| 1566 |
+
tm.assert_series_equal(res_first, Series(exp_first))
|
| 1567 |
+
|
| 1568 |
+
res_last = s.duplicated(keep="last")
|
| 1569 |
+
tm.assert_series_equal(res_last, Series(exp_last))
|
| 1570 |
+
|
| 1571 |
+
res_false = s.duplicated(keep=False)
|
| 1572 |
+
tm.assert_series_equal(res_false, Series(exp_false))
|
| 1573 |
+
|
| 1574 |
+
def test_datetime_likes(self):
|
| 1575 |
+
dt = [
|
| 1576 |
+
"2011-01-01",
|
| 1577 |
+
"2011-01-02",
|
| 1578 |
+
"2011-01-01",
|
| 1579 |
+
"NaT",
|
| 1580 |
+
"2011-01-03",
|
| 1581 |
+
"2011-01-02",
|
| 1582 |
+
"2011-01-04",
|
| 1583 |
+
"2011-01-01",
|
| 1584 |
+
"NaT",
|
| 1585 |
+
"2011-01-06",
|
| 1586 |
+
]
|
| 1587 |
+
td = [
|
| 1588 |
+
"1 days",
|
| 1589 |
+
"2 days",
|
| 1590 |
+
"1 days",
|
| 1591 |
+
"NaT",
|
| 1592 |
+
"3 days",
|
| 1593 |
+
"2 days",
|
| 1594 |
+
"4 days",
|
| 1595 |
+
"1 days",
|
| 1596 |
+
"NaT",
|
| 1597 |
+
"6 days",
|
| 1598 |
+
]
|
| 1599 |
+
|
| 1600 |
+
cases = [
|
| 1601 |
+
np.array([Timestamp(d) for d in dt]),
|
| 1602 |
+
np.array([Timestamp(d, tz="US/Eastern") for d in dt]),
|
| 1603 |
+
np.array([Period(d, freq="D") for d in dt]),
|
| 1604 |
+
np.array([np.datetime64(d) for d in dt]),
|
| 1605 |
+
np.array([Timedelta(d) for d in td]),
|
| 1606 |
+
]
|
| 1607 |
+
|
| 1608 |
+
exp_first = np.array(
|
| 1609 |
+
[False, False, True, False, False, True, False, True, True, False]
|
| 1610 |
+
)
|
| 1611 |
+
exp_last = np.array(
|
| 1612 |
+
[True, True, True, True, False, False, False, False, False, False]
|
| 1613 |
+
)
|
| 1614 |
+
exp_false = exp_first | exp_last
|
| 1615 |
+
|
| 1616 |
+
for case in cases:
|
| 1617 |
+
res_first = algos.duplicated(case, keep="first")
|
| 1618 |
+
tm.assert_numpy_array_equal(res_first, exp_first)
|
| 1619 |
+
|
| 1620 |
+
res_last = algos.duplicated(case, keep="last")
|
| 1621 |
+
tm.assert_numpy_array_equal(res_last, exp_last)
|
| 1622 |
+
|
| 1623 |
+
res_false = algos.duplicated(case, keep=False)
|
| 1624 |
+
tm.assert_numpy_array_equal(res_false, exp_false)
|
| 1625 |
+
|
| 1626 |
+
# index
|
| 1627 |
+
for idx in [
|
| 1628 |
+
Index(case),
|
| 1629 |
+
Index(case, dtype="category"),
|
| 1630 |
+
Index(case, dtype=object),
|
| 1631 |
+
]:
|
| 1632 |
+
res_first = idx.duplicated(keep="first")
|
| 1633 |
+
tm.assert_numpy_array_equal(res_first, exp_first)
|
| 1634 |
+
|
| 1635 |
+
res_last = idx.duplicated(keep="last")
|
| 1636 |
+
tm.assert_numpy_array_equal(res_last, exp_last)
|
| 1637 |
+
|
| 1638 |
+
res_false = idx.duplicated(keep=False)
|
| 1639 |
+
tm.assert_numpy_array_equal(res_false, exp_false)
|
| 1640 |
+
|
| 1641 |
+
# series
|
| 1642 |
+
for s in [
|
| 1643 |
+
Series(case),
|
| 1644 |
+
Series(case, dtype="category"),
|
| 1645 |
+
Series(case, dtype=object),
|
| 1646 |
+
]:
|
| 1647 |
+
res_first = s.duplicated(keep="first")
|
| 1648 |
+
tm.assert_series_equal(res_first, Series(exp_first))
|
| 1649 |
+
|
| 1650 |
+
res_last = s.duplicated(keep="last")
|
| 1651 |
+
tm.assert_series_equal(res_last, Series(exp_last))
|
| 1652 |
+
|
| 1653 |
+
res_false = s.duplicated(keep=False)
|
| 1654 |
+
tm.assert_series_equal(res_false, Series(exp_false))
|
| 1655 |
+
|
| 1656 |
+
@pytest.mark.parametrize("case", [Index([1, 2, 3]), pd.RangeIndex(0, 3)])
|
| 1657 |
+
def test_unique_index(self, case):
|
| 1658 |
+
assert case.is_unique is True
|
| 1659 |
+
tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
|
| 1660 |
+
|
| 1661 |
+
@pytest.mark.parametrize(
|
| 1662 |
+
"arr, uniques",
|
| 1663 |
+
[
|
| 1664 |
+
(
|
| 1665 |
+
[(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)],
|
| 1666 |
+
[(0, 0), (0, 1), (1, 0), (1, 1)],
|
| 1667 |
+
),
|
| 1668 |
+
(
|
| 1669 |
+
[("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")],
|
| 1670 |
+
[("b", "c"), ("a", "b")],
|
| 1671 |
+
),
|
| 1672 |
+
([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]),
|
| 1673 |
+
],
|
| 1674 |
+
)
|
| 1675 |
+
def test_unique_tuples(self, arr, uniques):
|
| 1676 |
+
# https://github.com/pandas-dev/pandas/issues/16519
|
| 1677 |
+
expected = np.empty(len(uniques), dtype=object)
|
| 1678 |
+
expected[:] = uniques
|
| 1679 |
+
|
| 1680 |
+
msg = "unique with argument that is not not a Series"
|
| 1681 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1682 |
+
result = pd.unique(arr)
|
| 1683 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1684 |
+
|
| 1685 |
+
@pytest.mark.parametrize(
|
| 1686 |
+
"array,expected",
|
| 1687 |
+
[
|
| 1688 |
+
(
|
| 1689 |
+
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
|
| 1690 |
+
# Should return a complex dtype in the future
|
| 1691 |
+
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
|
| 1692 |
+
)
|
| 1693 |
+
],
|
| 1694 |
+
)
|
| 1695 |
+
def test_unique_complex_numbers(self, array, expected):
|
| 1696 |
+
# GH 17927
|
| 1697 |
+
msg = "unique with argument that is not not a Series"
|
| 1698 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1699 |
+
result = pd.unique(array)
|
| 1700 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1701 |
+
|
| 1702 |
+
|
| 1703 |
+
class TestHashTable:
|
| 1704 |
+
@pytest.mark.parametrize(
|
| 1705 |
+
"htable, data",
|
| 1706 |
+
[
|
| 1707 |
+
(ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
|
| 1708 |
+
(ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
|
| 1709 |
+
(ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
|
| 1710 |
+
(ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
|
| 1711 |
+
(ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
|
| 1712 |
+
],
|
| 1713 |
+
)
|
| 1714 |
+
def test_hashtable_unique(self, htable, data, writable):
|
| 1715 |
+
# output of maker has guaranteed unique elements
|
| 1716 |
+
s = Series(data)
|
| 1717 |
+
if htable == ht.Float64HashTable:
|
| 1718 |
+
# add NaN for float column
|
| 1719 |
+
s.loc[500] = np.nan
|
| 1720 |
+
elif htable == ht.PyObjectHashTable:
|
| 1721 |
+
# use different NaN types for object column
|
| 1722 |
+
s.loc[500:502] = [np.nan, None, NaT]
|
| 1723 |
+
|
| 1724 |
+
# create duplicated selection
|
| 1725 |
+
s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
|
| 1726 |
+
s_duplicated.values.setflags(write=writable)
|
| 1727 |
+
|
| 1728 |
+
# drop_duplicates has own cython code (hash_table_func_helper.pxi)
|
| 1729 |
+
# and is tested separately; keeps first occurrence like ht.unique()
|
| 1730 |
+
expected_unique = s_duplicated.drop_duplicates(keep="first").values
|
| 1731 |
+
result_unique = htable().unique(s_duplicated.values)
|
| 1732 |
+
tm.assert_numpy_array_equal(result_unique, expected_unique)
|
| 1733 |
+
|
| 1734 |
+
# test return_inverse=True
|
| 1735 |
+
# reconstruction can only succeed if the inverse is correct
|
| 1736 |
+
result_unique, result_inverse = htable().unique(
|
| 1737 |
+
s_duplicated.values, return_inverse=True
|
| 1738 |
+
)
|
| 1739 |
+
tm.assert_numpy_array_equal(result_unique, expected_unique)
|
| 1740 |
+
reconstr = result_unique[result_inverse]
|
| 1741 |
+
tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
|
| 1742 |
+
|
| 1743 |
+
@pytest.mark.parametrize(
|
| 1744 |
+
"htable, data",
|
| 1745 |
+
[
|
| 1746 |
+
(ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
|
| 1747 |
+
(ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
|
| 1748 |
+
(ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
|
| 1749 |
+
(ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
|
| 1750 |
+
(ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
|
| 1751 |
+
],
|
| 1752 |
+
)
|
| 1753 |
+
def test_hashtable_factorize(self, htable, writable, data):
|
| 1754 |
+
# output of maker has guaranteed unique elements
|
| 1755 |
+
s = Series(data)
|
| 1756 |
+
if htable == ht.Float64HashTable:
|
| 1757 |
+
# add NaN for float column
|
| 1758 |
+
s.loc[500] = np.nan
|
| 1759 |
+
elif htable == ht.PyObjectHashTable:
|
| 1760 |
+
# use different NaN types for object column
|
| 1761 |
+
s.loc[500:502] = [np.nan, None, NaT]
|
| 1762 |
+
|
| 1763 |
+
# create duplicated selection
|
| 1764 |
+
s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
|
| 1765 |
+
s_duplicated.values.setflags(write=writable)
|
| 1766 |
+
na_mask = s_duplicated.isna().values
|
| 1767 |
+
|
| 1768 |
+
result_unique, result_inverse = htable().factorize(s_duplicated.values)
|
| 1769 |
+
|
| 1770 |
+
# drop_duplicates has own cython code (hash_table_func_helper.pxi)
|
| 1771 |
+
# and is tested separately; keeps first occurrence like ht.factorize()
|
| 1772 |
+
# since factorize removes all NaNs, we do the same here
|
| 1773 |
+
expected_unique = s_duplicated.dropna().drop_duplicates().values
|
| 1774 |
+
tm.assert_numpy_array_equal(result_unique, expected_unique)
|
| 1775 |
+
|
| 1776 |
+
# reconstruction can only succeed if the inverse is correct. Since
|
| 1777 |
+
# factorize removes the NaNs, those have to be excluded here as well
|
| 1778 |
+
result_reconstruct = result_unique[result_inverse[~na_mask]]
|
| 1779 |
+
expected_reconstruct = s_duplicated.dropna().values
|
| 1780 |
+
tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct)
|
| 1781 |
+
|
| 1782 |
+
|
| 1783 |
+
class TestRank:
|
| 1784 |
+
@pytest.mark.parametrize(
|
| 1785 |
+
"arr",
|
| 1786 |
+
[
|
| 1787 |
+
[np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan],
|
| 1788 |
+
[4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan],
|
| 1789 |
+
],
|
| 1790 |
+
)
|
| 1791 |
+
def test_scipy_compat(self, arr):
|
| 1792 |
+
sp_stats = pytest.importorskip("scipy.stats")
|
| 1793 |
+
|
| 1794 |
+
arr = np.array(arr)
|
| 1795 |
+
|
| 1796 |
+
mask = ~np.isfinite(arr)
|
| 1797 |
+
arr = arr.copy()
|
| 1798 |
+
result = libalgos.rank_1d(arr)
|
| 1799 |
+
arr[mask] = np.inf
|
| 1800 |
+
exp = sp_stats.rankdata(arr)
|
| 1801 |
+
exp[mask] = np.nan
|
| 1802 |
+
tm.assert_almost_equal(result, exp)
|
| 1803 |
+
|
| 1804 |
+
@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
|
| 1805 |
+
def test_basic(self, writable, dtype):
|
| 1806 |
+
exp = np.array([1, 2], dtype=np.float64)
|
| 1807 |
+
|
| 1808 |
+
data = np.array([1, 100], dtype=dtype)
|
| 1809 |
+
data.setflags(write=writable)
|
| 1810 |
+
ser = Series(data)
|
| 1811 |
+
result = algos.rank(ser)
|
| 1812 |
+
tm.assert_numpy_array_equal(result, exp)
|
| 1813 |
+
|
| 1814 |
+
@pytest.mark.parametrize("dtype", [np.float64, np.uint64])
|
| 1815 |
+
def test_uint64_overflow(self, dtype):
|
| 1816 |
+
exp = np.array([1, 2], dtype=np.float64)
|
| 1817 |
+
|
| 1818 |
+
s = Series([1, 2**63], dtype=dtype)
|
| 1819 |
+
tm.assert_numpy_array_equal(algos.rank(s), exp)
|
| 1820 |
+
|
| 1821 |
+
def test_too_many_ndims(self):
|
| 1822 |
+
arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
|
| 1823 |
+
msg = "Array with ndim > 2 are not supported"
|
| 1824 |
+
|
| 1825 |
+
with pytest.raises(TypeError, match=msg):
|
| 1826 |
+
algos.rank(arr)
|
| 1827 |
+
|
| 1828 |
+
@pytest.mark.single_cpu
|
| 1829 |
+
def test_pct_max_many_rows(self):
|
| 1830 |
+
# GH 18271
|
| 1831 |
+
values = np.arange(2**24 + 1)
|
| 1832 |
+
result = algos.rank(values, pct=True).max()
|
| 1833 |
+
assert result == 1
|
| 1834 |
+
|
| 1835 |
+
values = np.arange(2**25 + 2).reshape(2**24 + 1, 2)
|
| 1836 |
+
result = algos.rank(values, pct=True).max()
|
| 1837 |
+
assert result == 1
|
| 1838 |
+
|
| 1839 |
+
|
| 1840 |
+
class TestMode:
|
| 1841 |
+
def test_no_mode(self):
|
| 1842 |
+
exp = Series([], dtype=np.float64, index=Index([], dtype=int))
|
| 1843 |
+
tm.assert_numpy_array_equal(algos.mode(np.array([])), exp.values)
|
| 1844 |
+
|
| 1845 |
+
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
|
| 1846 |
+
def test_mode_single(self, dt):
|
| 1847 |
+
# GH 15714
|
| 1848 |
+
exp_single = [1]
|
| 1849 |
+
data_single = [1]
|
| 1850 |
+
|
| 1851 |
+
exp_multi = [1]
|
| 1852 |
+
data_multi = [1, 1]
|
| 1853 |
+
|
| 1854 |
+
ser = Series(data_single, dtype=dt)
|
| 1855 |
+
exp = Series(exp_single, dtype=dt)
|
| 1856 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1857 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1858 |
+
|
| 1859 |
+
ser = Series(data_multi, dtype=dt)
|
| 1860 |
+
exp = Series(exp_multi, dtype=dt)
|
| 1861 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1862 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1863 |
+
|
| 1864 |
+
def test_mode_obj_int(self):
|
| 1865 |
+
exp = Series([1], dtype=int)
|
| 1866 |
+
tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
|
| 1867 |
+
|
| 1868 |
+
exp = Series(["a", "b", "c"], dtype=object)
|
| 1869 |
+
tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
|
| 1870 |
+
|
| 1871 |
+
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
|
| 1872 |
+
def test_number_mode(self, dt):
|
| 1873 |
+
exp_single = [1]
|
| 1874 |
+
data_single = [1] * 5 + [2] * 3
|
| 1875 |
+
|
| 1876 |
+
exp_multi = [1, 3]
|
| 1877 |
+
data_multi = [1] * 5 + [2] * 3 + [3] * 5
|
| 1878 |
+
|
| 1879 |
+
ser = Series(data_single, dtype=dt)
|
| 1880 |
+
exp = Series(exp_single, dtype=dt)
|
| 1881 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1882 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1883 |
+
|
| 1884 |
+
ser = Series(data_multi, dtype=dt)
|
| 1885 |
+
exp = Series(exp_multi, dtype=dt)
|
| 1886 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1887 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1888 |
+
|
| 1889 |
+
def test_strobj_mode(self):
|
| 1890 |
+
exp = ["b"]
|
| 1891 |
+
data = ["a"] * 2 + ["b"] * 3
|
| 1892 |
+
|
| 1893 |
+
ser = Series(data, dtype="c")
|
| 1894 |
+
exp = Series(exp, dtype="c")
|
| 1895 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1896 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1897 |
+
|
| 1898 |
+
@pytest.mark.parametrize("dt", [str, object])
|
| 1899 |
+
def test_strobj_multi_char(self, dt):
|
| 1900 |
+
exp = ["bar"]
|
| 1901 |
+
data = ["foo"] * 2 + ["bar"] * 3
|
| 1902 |
+
|
| 1903 |
+
ser = Series(data, dtype=dt)
|
| 1904 |
+
exp = Series(exp, dtype=dt)
|
| 1905 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1906 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1907 |
+
|
| 1908 |
+
def test_datelike_mode(self):
|
| 1909 |
+
exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]")
|
| 1910 |
+
ser = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
|
| 1911 |
+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
|
| 1912 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1913 |
+
|
| 1914 |
+
exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]")
|
| 1915 |
+
ser = Series(
|
| 1916 |
+
["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"],
|
| 1917 |
+
dtype="M8[ns]",
|
| 1918 |
+
)
|
| 1919 |
+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
|
| 1920 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1921 |
+
|
| 1922 |
+
def test_timedelta_mode(self):
|
| 1923 |
+
exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]")
|
| 1924 |
+
ser = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
|
| 1925 |
+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
|
| 1926 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1927 |
+
|
| 1928 |
+
exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
|
| 1929 |
+
ser = Series(
|
| 1930 |
+
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
|
| 1931 |
+
dtype="timedelta64[ns]",
|
| 1932 |
+
)
|
| 1933 |
+
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
|
| 1934 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1935 |
+
|
| 1936 |
+
def test_mixed_dtype(self):
|
| 1937 |
+
exp = Series(["foo"], dtype=object)
|
| 1938 |
+
ser = Series([1, "foo", "foo"])
|
| 1939 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1940 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1941 |
+
|
| 1942 |
+
def test_uint64_overflow(self):
|
| 1943 |
+
exp = Series([2**63], dtype=np.uint64)
|
| 1944 |
+
ser = Series([1, 2**63, 2**63], dtype=np.uint64)
|
| 1945 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1946 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1947 |
+
|
| 1948 |
+
exp = Series([1, 2**63], dtype=np.uint64)
|
| 1949 |
+
ser = Series([1, 2**63], dtype=np.uint64)
|
| 1950 |
+
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
|
| 1951 |
+
tm.assert_series_equal(ser.mode(), exp)
|
| 1952 |
+
|
| 1953 |
+
def test_categorical(self):
|
| 1954 |
+
c = Categorical([1, 2])
|
| 1955 |
+
exp = c
|
| 1956 |
+
res = Series(c).mode()._values
|
| 1957 |
+
tm.assert_categorical_equal(res, exp)
|
| 1958 |
+
|
| 1959 |
+
c = Categorical([1, "a", "a"])
|
| 1960 |
+
exp = Categorical(["a"], categories=[1, "a"])
|
| 1961 |
+
res = Series(c).mode()._values
|
| 1962 |
+
tm.assert_categorical_equal(res, exp)
|
| 1963 |
+
|
| 1964 |
+
c = Categorical([1, 1, 2, 3, 3])
|
| 1965 |
+
exp = Categorical([1, 3], categories=[1, 2, 3])
|
| 1966 |
+
res = Series(c).mode()._values
|
| 1967 |
+
tm.assert_categorical_equal(res, exp)
|
| 1968 |
+
|
| 1969 |
+
def test_index(self):
|
| 1970 |
+
idx = Index([1, 2, 3])
|
| 1971 |
+
exp = Series([1, 2, 3], dtype=np.int64)
|
| 1972 |
+
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
|
| 1973 |
+
|
| 1974 |
+
idx = Index([1, "a", "a"])
|
| 1975 |
+
exp = Series(["a"], dtype=object)
|
| 1976 |
+
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
|
| 1977 |
+
|
| 1978 |
+
idx = Index([1, 1, 2, 3, 3])
|
| 1979 |
+
exp = Series([1, 3], dtype=np.int64)
|
| 1980 |
+
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
|
| 1981 |
+
|
| 1982 |
+
idx = Index(
|
| 1983 |
+
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
|
| 1984 |
+
dtype="timedelta64[ns]",
|
| 1985 |
+
)
|
| 1986 |
+
with pytest.raises(AttributeError, match="TimedeltaIndex"):
|
| 1987 |
+
# algos.mode expects Arraylike, does *not* unwrap TimedeltaIndex
|
| 1988 |
+
algos.mode(idx)
|
| 1989 |
+
|
| 1990 |
+
def test_ser_mode_with_name(self):
|
| 1991 |
+
# GH 46737
|
| 1992 |
+
ser = Series([1, 1, 3], name="foo")
|
| 1993 |
+
result = ser.mode()
|
| 1994 |
+
expected = Series([1], name="foo")
|
| 1995 |
+
tm.assert_series_equal(result, expected)
|
| 1996 |
+
|
| 1997 |
+
|
| 1998 |
+
class TestDiff:
|
| 1999 |
+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
|
| 2000 |
+
def test_diff_datetimelike_nat(self, dtype):
|
| 2001 |
+
# NaT - NaT is NaT, not 0
|
| 2002 |
+
arr = np.arange(12).astype(np.int64).view(dtype).reshape(3, 4)
|
| 2003 |
+
arr[:, 2] = arr.dtype.type("NaT", "ns")
|
| 2004 |
+
result = algos.diff(arr, 1, axis=0)
|
| 2005 |
+
|
| 2006 |
+
expected = np.ones(arr.shape, dtype="timedelta64[ns]") * 4
|
| 2007 |
+
expected[:, 2] = np.timedelta64("NaT", "ns")
|
| 2008 |
+
expected[0, :] = np.timedelta64("NaT", "ns")
|
| 2009 |
+
|
| 2010 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 2011 |
+
|
| 2012 |
+
result = algos.diff(arr.T, 1, axis=1)
|
| 2013 |
+
tm.assert_numpy_array_equal(result, expected.T)
|
| 2014 |
+
|
| 2015 |
+
def test_diff_ea_axis(self):
|
| 2016 |
+
dta = date_range("2016-01-01", periods=3, tz="US/Pacific")._data
|
| 2017 |
+
|
| 2018 |
+
msg = "cannot diff DatetimeArray on axis=1"
|
| 2019 |
+
with pytest.raises(ValueError, match=msg):
|
| 2020 |
+
algos.diff(dta, 1, axis=1)
|
| 2021 |
+
|
| 2022 |
+
@pytest.mark.parametrize("dtype", ["int8", "int16"])
|
| 2023 |
+
def test_diff_low_precision_int(self, dtype):
|
| 2024 |
+
arr = np.array([0, 1, 1, 0, 0], dtype=dtype)
|
| 2025 |
+
result = algos.diff(arr, 1)
|
| 2026 |
+
expected = np.array([np.nan, 1, 0, -1, 0], dtype="float32")
|
| 2027 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 2028 |
+
|
| 2029 |
+
|
| 2030 |
+
@pytest.mark.parametrize("op", [np.array, pd.array])
|
| 2031 |
+
def test_union_with_duplicates(op):
|
| 2032 |
+
# GH#36289
|
| 2033 |
+
lvals = op([3, 1, 3, 4])
|
| 2034 |
+
rvals = op([2, 3, 1, 1])
|
| 2035 |
+
expected = op([3, 3, 1, 1, 4, 2])
|
| 2036 |
+
if isinstance(expected, np.ndarray):
|
| 2037 |
+
result = algos.union_with_duplicates(lvals, rvals)
|
| 2038 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 2039 |
+
else:
|
| 2040 |
+
result = algos.union_with_duplicates(lvals, rvals)
|
| 2041 |
+
tm.assert_extension_array_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_common.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import collections
|
| 2 |
+
from functools import partial
|
| 3 |
+
import string
|
| 4 |
+
import subprocess
|
| 5 |
+
import sys
|
| 6 |
+
import textwrap
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
from pandas import Series
|
| 13 |
+
import pandas._testing as tm
|
| 14 |
+
from pandas.core import ops
|
| 15 |
+
import pandas.core.common as com
|
| 16 |
+
from pandas.util.version import Version
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_get_callable_name():
|
| 20 |
+
getname = com.get_callable_name
|
| 21 |
+
|
| 22 |
+
def fn(x):
|
| 23 |
+
return x
|
| 24 |
+
|
| 25 |
+
lambda_ = lambda x: x
|
| 26 |
+
part1 = partial(fn)
|
| 27 |
+
part2 = partial(part1)
|
| 28 |
+
|
| 29 |
+
class somecall:
|
| 30 |
+
def __call__(self):
|
| 31 |
+
# This shouldn't actually get called below; somecall.__init__
|
| 32 |
+
# should.
|
| 33 |
+
raise NotImplementedError
|
| 34 |
+
|
| 35 |
+
assert getname(fn) == "fn"
|
| 36 |
+
assert getname(lambda_)
|
| 37 |
+
assert getname(part1) == "fn"
|
| 38 |
+
assert getname(part2) == "fn"
|
| 39 |
+
assert getname(somecall()) == "somecall"
|
| 40 |
+
assert getname(1) is None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def test_any_none():
|
| 44 |
+
assert com.any_none(1, 2, 3, None)
|
| 45 |
+
assert not com.any_none(1, 2, 3, 4)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_all_not_none():
|
| 49 |
+
assert com.all_not_none(1, 2, 3, 4)
|
| 50 |
+
assert not com.all_not_none(1, 2, 3, None)
|
| 51 |
+
assert not com.all_not_none(None, None, None, None)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def test_random_state():
|
| 55 |
+
# Check with seed
|
| 56 |
+
state = com.random_state(5)
|
| 57 |
+
assert state.uniform() == np.random.RandomState(5).uniform()
|
| 58 |
+
|
| 59 |
+
# Check with random state object
|
| 60 |
+
state2 = np.random.RandomState(10)
|
| 61 |
+
assert com.random_state(state2).uniform() == np.random.RandomState(10).uniform()
|
| 62 |
+
|
| 63 |
+
# check with no arg random state
|
| 64 |
+
assert com.random_state() is np.random
|
| 65 |
+
|
| 66 |
+
# check array-like
|
| 67 |
+
# GH32503
|
| 68 |
+
state_arr_like = np.random.default_rng(None).integers(
|
| 69 |
+
0, 2**31, size=624, dtype="uint32"
|
| 70 |
+
)
|
| 71 |
+
assert (
|
| 72 |
+
com.random_state(state_arr_like).uniform()
|
| 73 |
+
== np.random.RandomState(state_arr_like).uniform()
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Check BitGenerators
|
| 77 |
+
# GH32503
|
| 78 |
+
assert (
|
| 79 |
+
com.random_state(np.random.MT19937(3)).uniform()
|
| 80 |
+
== np.random.RandomState(np.random.MT19937(3)).uniform()
|
| 81 |
+
)
|
| 82 |
+
assert (
|
| 83 |
+
com.random_state(np.random.PCG64(11)).uniform()
|
| 84 |
+
== np.random.RandomState(np.random.PCG64(11)).uniform()
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Error for floats or strings
|
| 88 |
+
msg = (
|
| 89 |
+
"random_state must be an integer, array-like, a BitGenerator, Generator, "
|
| 90 |
+
"a numpy RandomState, or None"
|
| 91 |
+
)
|
| 92 |
+
with pytest.raises(ValueError, match=msg):
|
| 93 |
+
com.random_state("test")
|
| 94 |
+
|
| 95 |
+
with pytest.raises(ValueError, match=msg):
|
| 96 |
+
com.random_state(5.5)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
@pytest.mark.parametrize(
|
| 100 |
+
"left, right, expected",
|
| 101 |
+
[
|
| 102 |
+
(Series([1], name="x"), Series([2], name="x"), "x"),
|
| 103 |
+
(Series([1], name="x"), Series([2], name="y"), None),
|
| 104 |
+
(Series([1]), Series([2], name="x"), None),
|
| 105 |
+
(Series([1], name="x"), Series([2]), None),
|
| 106 |
+
(Series([1], name="x"), [2], "x"),
|
| 107 |
+
([1], Series([2], name="y"), "y"),
|
| 108 |
+
# matching NAs
|
| 109 |
+
(Series([1], name=np.nan), pd.Index([], name=np.nan), np.nan),
|
| 110 |
+
(Series([1], name=np.nan), pd.Index([], name=pd.NaT), None),
|
| 111 |
+
(Series([1], name=pd.NA), pd.Index([], name=pd.NA), pd.NA),
|
| 112 |
+
# tuple name GH#39757
|
| 113 |
+
(
|
| 114 |
+
Series([1], name=np.int64(1)),
|
| 115 |
+
pd.Index([], name=(np.int64(1), np.int64(2))),
|
| 116 |
+
None,
|
| 117 |
+
),
|
| 118 |
+
(
|
| 119 |
+
Series([1], name=(np.int64(1), np.int64(2))),
|
| 120 |
+
pd.Index([], name=(np.int64(1), np.int64(2))),
|
| 121 |
+
(np.int64(1), np.int64(2)),
|
| 122 |
+
),
|
| 123 |
+
pytest.param(
|
| 124 |
+
Series([1], name=(np.float64("nan"), np.int64(2))),
|
| 125 |
+
pd.Index([], name=(np.float64("nan"), np.int64(2))),
|
| 126 |
+
(np.float64("nan"), np.int64(2)),
|
| 127 |
+
marks=pytest.mark.xfail(
|
| 128 |
+
reason="Not checking for matching NAs inside tuples."
|
| 129 |
+
),
|
| 130 |
+
),
|
| 131 |
+
],
|
| 132 |
+
)
|
| 133 |
+
def test_maybe_match_name(left, right, expected):
|
| 134 |
+
res = ops.common._maybe_match_name(left, right)
|
| 135 |
+
assert res is expected or res == expected
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def test_standardize_mapping():
|
| 139 |
+
# No uninitialized defaultdicts
|
| 140 |
+
msg = r"to_dict\(\) only accepts initialized defaultdicts"
|
| 141 |
+
with pytest.raises(TypeError, match=msg):
|
| 142 |
+
com.standardize_mapping(collections.defaultdict)
|
| 143 |
+
|
| 144 |
+
# No non-mapping subtypes, instance
|
| 145 |
+
msg = "unsupported type: <class 'list'>"
|
| 146 |
+
with pytest.raises(TypeError, match=msg):
|
| 147 |
+
com.standardize_mapping([])
|
| 148 |
+
|
| 149 |
+
# No non-mapping subtypes, class
|
| 150 |
+
with pytest.raises(TypeError, match=msg):
|
| 151 |
+
com.standardize_mapping(list)
|
| 152 |
+
|
| 153 |
+
fill = {"bad": "data"}
|
| 154 |
+
assert com.standardize_mapping(fill) == dict
|
| 155 |
+
|
| 156 |
+
# Convert instance to type
|
| 157 |
+
assert com.standardize_mapping({}) == dict
|
| 158 |
+
|
| 159 |
+
dd = collections.defaultdict(list)
|
| 160 |
+
assert isinstance(com.standardize_mapping(dd), partial)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def test_git_version():
|
| 164 |
+
# GH 21295
|
| 165 |
+
git_version = pd.__git_version__
|
| 166 |
+
assert len(git_version) == 40
|
| 167 |
+
assert all(c in string.hexdigits for c in git_version)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def test_version_tag():
|
| 171 |
+
version = Version(pd.__version__)
|
| 172 |
+
try:
|
| 173 |
+
version > Version("0.0.1")
|
| 174 |
+
except TypeError:
|
| 175 |
+
raise ValueError(
|
| 176 |
+
"No git tags exist, please sync tags between upstream and your repo"
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@pytest.mark.parametrize(
|
| 181 |
+
"obj", [(obj,) for obj in pd.__dict__.values() if callable(obj)]
|
| 182 |
+
)
|
| 183 |
+
def test_serializable(obj):
|
| 184 |
+
# GH 35611
|
| 185 |
+
unpickled = tm.round_trip_pickle(obj)
|
| 186 |
+
assert type(obj) == type(unpickled)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
class TestIsBoolIndexer:
|
| 190 |
+
def test_non_bool_array_with_na(self):
|
| 191 |
+
# in particular, this should not raise
|
| 192 |
+
arr = np.array(["A", "B", np.nan], dtype=object)
|
| 193 |
+
assert not com.is_bool_indexer(arr)
|
| 194 |
+
|
| 195 |
+
def test_list_subclass(self):
|
| 196 |
+
# GH#42433
|
| 197 |
+
|
| 198 |
+
class MyList(list):
|
| 199 |
+
pass
|
| 200 |
+
|
| 201 |
+
val = MyList(["a"])
|
| 202 |
+
|
| 203 |
+
assert not com.is_bool_indexer(val)
|
| 204 |
+
|
| 205 |
+
val = MyList([True])
|
| 206 |
+
assert com.is_bool_indexer(val)
|
| 207 |
+
|
| 208 |
+
def test_frozenlist(self):
|
| 209 |
+
# GH#42461
|
| 210 |
+
data = {"col1": [1, 2], "col2": [3, 4]}
|
| 211 |
+
df = pd.DataFrame(data=data)
|
| 212 |
+
|
| 213 |
+
frozen = df.index.names[1:]
|
| 214 |
+
assert not com.is_bool_indexer(frozen)
|
| 215 |
+
|
| 216 |
+
result = df[frozen]
|
| 217 |
+
expected = df[[]]
|
| 218 |
+
tm.assert_frame_equal(result, expected)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
@pytest.mark.parametrize("with_exception", [True, False])
|
| 222 |
+
def test_temp_setattr(with_exception):
|
| 223 |
+
# GH#45954
|
| 224 |
+
ser = Series(dtype=object)
|
| 225 |
+
ser.name = "first"
|
| 226 |
+
# Raise a ValueError in either case to satisfy pytest.raises
|
| 227 |
+
match = "Inside exception raised" if with_exception else "Outside exception raised"
|
| 228 |
+
with pytest.raises(ValueError, match=match):
|
| 229 |
+
with com.temp_setattr(ser, "name", "second"):
|
| 230 |
+
assert ser.name == "second"
|
| 231 |
+
if with_exception:
|
| 232 |
+
raise ValueError("Inside exception raised")
|
| 233 |
+
raise ValueError("Outside exception raised")
|
| 234 |
+
assert ser.name == "first"
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
@pytest.mark.single_cpu
|
| 238 |
+
def test_str_size():
|
| 239 |
+
# GH#21758
|
| 240 |
+
a = "a"
|
| 241 |
+
expected = sys.getsizeof(a)
|
| 242 |
+
pyexe = sys.executable.replace("\\", "/")
|
| 243 |
+
call = [
|
| 244 |
+
pyexe,
|
| 245 |
+
"-c",
|
| 246 |
+
"a='a';import sys;sys.getsizeof(a);import pandas;print(sys.getsizeof(a));",
|
| 247 |
+
]
|
| 248 |
+
result = subprocess.check_output(call).decode()[-4:-1].strip("\n")
|
| 249 |
+
assert int(result) == int(expected)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
@pytest.mark.single_cpu
|
| 253 |
+
def test_bz2_missing_import():
|
| 254 |
+
# Check whether bz2 missing import is handled correctly (issue #53857)
|
| 255 |
+
code = """
|
| 256 |
+
import sys
|
| 257 |
+
sys.modules['bz2'] = None
|
| 258 |
+
import pytest
|
| 259 |
+
import pandas as pd
|
| 260 |
+
from pandas.compat import get_bz2_file
|
| 261 |
+
msg = 'bz2 module not available.'
|
| 262 |
+
with pytest.raises(RuntimeError, match=msg):
|
| 263 |
+
get_bz2_file()
|
| 264 |
+
"""
|
| 265 |
+
code = textwrap.dedent(code)
|
| 266 |
+
call = [sys.executable, "-c", code]
|
| 267 |
+
subprocess.check_output(call)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_downstream.py
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Testing that we work in the downstream packages
|
| 3 |
+
"""
|
| 4 |
+
import array
|
| 5 |
+
import subprocess
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
from pandas.errors import IntCastingNaNError
|
| 12 |
+
import pandas.util._test_decorators as td
|
| 13 |
+
|
| 14 |
+
import pandas as pd
|
| 15 |
+
from pandas import (
|
| 16 |
+
DataFrame,
|
| 17 |
+
DatetimeIndex,
|
| 18 |
+
Series,
|
| 19 |
+
TimedeltaIndex,
|
| 20 |
+
)
|
| 21 |
+
import pandas._testing as tm
|
| 22 |
+
from pandas.core.arrays import (
|
| 23 |
+
DatetimeArray,
|
| 24 |
+
TimedeltaArray,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@pytest.fixture
|
| 29 |
+
def df():
|
| 30 |
+
return DataFrame({"A": [1, 2, 3]})
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_dask(df):
|
| 34 |
+
# dask sets "compute.use_numexpr" to False, so catch the current value
|
| 35 |
+
# and ensure to reset it afterwards to avoid impacting other tests
|
| 36 |
+
olduse = pd.get_option("compute.use_numexpr")
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
pytest.importorskip("toolz")
|
| 40 |
+
dd = pytest.importorskip("dask.dataframe")
|
| 41 |
+
|
| 42 |
+
ddf = dd.from_pandas(df, npartitions=3)
|
| 43 |
+
assert ddf.A is not None
|
| 44 |
+
assert ddf.compute() is not None
|
| 45 |
+
finally:
|
| 46 |
+
pd.set_option("compute.use_numexpr", olduse)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def test_dask_ufunc():
|
| 50 |
+
# dask sets "compute.use_numexpr" to False, so catch the current value
|
| 51 |
+
# and ensure to reset it afterwards to avoid impacting other tests
|
| 52 |
+
olduse = pd.get_option("compute.use_numexpr")
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
da = pytest.importorskip("dask.array")
|
| 56 |
+
dd = pytest.importorskip("dask.dataframe")
|
| 57 |
+
|
| 58 |
+
s = Series([1.5, 2.3, 3.7, 4.0])
|
| 59 |
+
ds = dd.from_pandas(s, npartitions=2)
|
| 60 |
+
|
| 61 |
+
result = da.fix(ds).compute()
|
| 62 |
+
expected = np.fix(s)
|
| 63 |
+
tm.assert_series_equal(result, expected)
|
| 64 |
+
finally:
|
| 65 |
+
pd.set_option("compute.use_numexpr", olduse)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_construct_dask_float_array_int_dtype_match_ndarray():
|
| 69 |
+
# GH#40110 make sure we treat a float-dtype dask array with the same
|
| 70 |
+
# rules we would for an ndarray
|
| 71 |
+
dd = pytest.importorskip("dask.dataframe")
|
| 72 |
+
|
| 73 |
+
arr = np.array([1, 2.5, 3])
|
| 74 |
+
darr = dd.from_array(arr)
|
| 75 |
+
|
| 76 |
+
res = Series(darr)
|
| 77 |
+
expected = Series(arr)
|
| 78 |
+
tm.assert_series_equal(res, expected)
|
| 79 |
+
|
| 80 |
+
# GH#49599 in 2.0 we raise instead of silently ignoring the dtype
|
| 81 |
+
msg = "Trying to coerce float values to integers"
|
| 82 |
+
with pytest.raises(ValueError, match=msg):
|
| 83 |
+
Series(darr, dtype="i8")
|
| 84 |
+
|
| 85 |
+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
|
| 86 |
+
arr[2] = np.nan
|
| 87 |
+
with pytest.raises(IntCastingNaNError, match=msg):
|
| 88 |
+
Series(darr, dtype="i8")
|
| 89 |
+
# which is the same as we get with a numpy input
|
| 90 |
+
with pytest.raises(IntCastingNaNError, match=msg):
|
| 91 |
+
Series(arr, dtype="i8")
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def test_xarray(df):
|
| 95 |
+
pytest.importorskip("xarray")
|
| 96 |
+
|
| 97 |
+
assert df.to_xarray() is not None
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def test_xarray_cftimeindex_nearest():
|
| 101 |
+
# https://github.com/pydata/xarray/issues/3751
|
| 102 |
+
cftime = pytest.importorskip("cftime")
|
| 103 |
+
xarray = pytest.importorskip("xarray")
|
| 104 |
+
|
| 105 |
+
times = xarray.cftime_range("0001", periods=2)
|
| 106 |
+
key = cftime.DatetimeGregorian(2000, 1, 1)
|
| 107 |
+
result = times.get_indexer([key], method="nearest")
|
| 108 |
+
expected = 1
|
| 109 |
+
assert result == expected
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
@pytest.mark.single_cpu
|
| 113 |
+
def test_oo_optimizable():
|
| 114 |
+
# GH 21071
|
| 115 |
+
subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
@pytest.mark.single_cpu
|
| 119 |
+
def test_oo_optimized_datetime_index_unpickle():
|
| 120 |
+
# GH 42866
|
| 121 |
+
subprocess.check_call(
|
| 122 |
+
[
|
| 123 |
+
sys.executable,
|
| 124 |
+
"-OO",
|
| 125 |
+
"-c",
|
| 126 |
+
(
|
| 127 |
+
"import pandas as pd, pickle; "
|
| 128 |
+
"pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))"
|
| 129 |
+
),
|
| 130 |
+
]
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def test_statsmodels():
|
| 135 |
+
smf = pytest.importorskip("statsmodels.formula.api")
|
| 136 |
+
|
| 137 |
+
df = DataFrame(
|
| 138 |
+
{"Lottery": range(5), "Literacy": range(5), "Pop1831": range(100, 105)}
|
| 139 |
+
)
|
| 140 |
+
smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit()
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def test_scikit_learn():
|
| 144 |
+
pytest.importorskip("sklearn")
|
| 145 |
+
from sklearn import (
|
| 146 |
+
datasets,
|
| 147 |
+
svm,
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
digits = datasets.load_digits()
|
| 151 |
+
clf = svm.SVC(gamma=0.001, C=100.0)
|
| 152 |
+
clf.fit(digits.data[:-1], digits.target[:-1])
|
| 153 |
+
clf.predict(digits.data[-1:])
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def test_seaborn():
|
| 157 |
+
seaborn = pytest.importorskip("seaborn")
|
| 158 |
+
tips = DataFrame(
|
| 159 |
+
{"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)}
|
| 160 |
+
)
|
| 161 |
+
seaborn.stripplot(x="day", y="total_bill", data=tips)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def test_pandas_datareader():
|
| 165 |
+
pytest.importorskip("pandas_datareader")
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
|
| 169 |
+
def test_pyarrow(df):
|
| 170 |
+
pyarrow = pytest.importorskip("pyarrow")
|
| 171 |
+
table = pyarrow.Table.from_pandas(df)
|
| 172 |
+
result = table.to_pandas()
|
| 173 |
+
tm.assert_frame_equal(result, df)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def test_yaml_dump(df):
|
| 177 |
+
# GH#42748
|
| 178 |
+
yaml = pytest.importorskip("yaml")
|
| 179 |
+
|
| 180 |
+
dumped = yaml.dump(df)
|
| 181 |
+
|
| 182 |
+
loaded = yaml.load(dumped, Loader=yaml.Loader)
|
| 183 |
+
tm.assert_frame_equal(df, loaded)
|
| 184 |
+
|
| 185 |
+
loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader)
|
| 186 |
+
tm.assert_frame_equal(df, loaded2)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@pytest.mark.single_cpu
|
| 190 |
+
def test_missing_required_dependency():
|
| 191 |
+
# GH 23868
|
| 192 |
+
# To ensure proper isolation, we pass these flags
|
| 193 |
+
# -S : disable site-packages
|
| 194 |
+
# -s : disable user site-packages
|
| 195 |
+
# -E : disable PYTHON* env vars, especially PYTHONPATH
|
| 196 |
+
# https://github.com/MacPython/pandas-wheels/pull/50
|
| 197 |
+
|
| 198 |
+
pyexe = sys.executable.replace("\\", "/")
|
| 199 |
+
|
| 200 |
+
# We skip this test if pandas is installed as a site package. We first
|
| 201 |
+
# import the package normally and check the path to the module before
|
| 202 |
+
# executing the test which imports pandas with site packages disabled.
|
| 203 |
+
call = [pyexe, "-c", "import pandas;print(pandas.__file__)"]
|
| 204 |
+
output = subprocess.check_output(call).decode()
|
| 205 |
+
if "site-packages" in output:
|
| 206 |
+
pytest.skip("pandas installed as site package")
|
| 207 |
+
|
| 208 |
+
# This test will fail if pandas is installed as a site package. The flags
|
| 209 |
+
# prevent pandas being imported and the test will report Failed: DID NOT
|
| 210 |
+
# RAISE <class 'subprocess.CalledProcessError'>
|
| 211 |
+
call = [pyexe, "-sSE", "-c", "import pandas"]
|
| 212 |
+
|
| 213 |
+
msg = (
|
| 214 |
+
rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' "
|
| 215 |
+
"returned non-zero exit status 1."
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
with pytest.raises(subprocess.CalledProcessError, match=msg) as exc:
|
| 219 |
+
subprocess.check_output(call, stderr=subprocess.STDOUT)
|
| 220 |
+
|
| 221 |
+
output = exc.value.stdout.decode()
|
| 222 |
+
for name in ["numpy", "pytz", "dateutil"]:
|
| 223 |
+
assert name in output
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def test_frame_setitem_dask_array_into_new_col():
|
| 227 |
+
# GH#47128
|
| 228 |
+
|
| 229 |
+
# dask sets "compute.use_numexpr" to False, so catch the current value
|
| 230 |
+
# and ensure to reset it afterwards to avoid impacting other tests
|
| 231 |
+
olduse = pd.get_option("compute.use_numexpr")
|
| 232 |
+
|
| 233 |
+
try:
|
| 234 |
+
da = pytest.importorskip("dask.array")
|
| 235 |
+
|
| 236 |
+
dda = da.array([1, 2])
|
| 237 |
+
df = DataFrame({"a": ["a", "b"]})
|
| 238 |
+
df["b"] = dda
|
| 239 |
+
df["c"] = dda
|
| 240 |
+
df.loc[[False, True], "b"] = 100
|
| 241 |
+
result = df.loc[[1], :]
|
| 242 |
+
expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
|
| 243 |
+
tm.assert_frame_equal(result, expected)
|
| 244 |
+
finally:
|
| 245 |
+
pd.set_option("compute.use_numexpr", olduse)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def test_pandas_priority():
|
| 249 |
+
# GH#48347
|
| 250 |
+
|
| 251 |
+
class MyClass:
|
| 252 |
+
__pandas_priority__ = 5000
|
| 253 |
+
|
| 254 |
+
def __radd__(self, other):
|
| 255 |
+
return self
|
| 256 |
+
|
| 257 |
+
left = MyClass()
|
| 258 |
+
right = Series(range(3))
|
| 259 |
+
|
| 260 |
+
assert right.__add__(left) is NotImplemented
|
| 261 |
+
assert right + left is left
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
@pytest.fixture(
|
| 265 |
+
params=[
|
| 266 |
+
"memoryview",
|
| 267 |
+
"array",
|
| 268 |
+
pytest.param("dask", marks=td.skip_if_no("dask.array")),
|
| 269 |
+
pytest.param("xarray", marks=td.skip_if_no("xarray")),
|
| 270 |
+
]
|
| 271 |
+
)
|
| 272 |
+
def array_likes(request):
|
| 273 |
+
"""
|
| 274 |
+
Fixture giving a numpy array and a parametrized 'data' object, which can
|
| 275 |
+
be a memoryview, array, dask or xarray object created from the numpy array.
|
| 276 |
+
"""
|
| 277 |
+
# GH#24539 recognize e.g xarray, dask, ...
|
| 278 |
+
arr = np.array([1, 2, 3], dtype=np.int64)
|
| 279 |
+
|
| 280 |
+
name = request.param
|
| 281 |
+
if name == "memoryview":
|
| 282 |
+
data = memoryview(arr)
|
| 283 |
+
elif name == "array":
|
| 284 |
+
data = array.array("i", arr)
|
| 285 |
+
elif name == "dask":
|
| 286 |
+
import dask.array
|
| 287 |
+
|
| 288 |
+
data = dask.array.array(arr)
|
| 289 |
+
elif name == "xarray":
|
| 290 |
+
import xarray as xr
|
| 291 |
+
|
| 292 |
+
data = xr.DataArray(arr)
|
| 293 |
+
|
| 294 |
+
return arr, data
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
|
| 298 |
+
def test_from_obscure_array(dtype, array_likes):
|
| 299 |
+
# GH#24539 recognize e.g xarray, dask, ...
|
| 300 |
+
# Note: we dont do this for PeriodArray bc _from_sequence won't accept
|
| 301 |
+
# an array of integers
|
| 302 |
+
# TODO: could check with arraylike of Period objects
|
| 303 |
+
arr, data = array_likes
|
| 304 |
+
|
| 305 |
+
cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]
|
| 306 |
+
|
| 307 |
+
depr_msg = f"{cls.__name__}.__init__ is deprecated"
|
| 308 |
+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
| 309 |
+
expected = cls(arr)
|
| 310 |
+
result = cls._from_sequence(data, dtype=dtype)
|
| 311 |
+
tm.assert_extension_array_equal(result, expected)
|
| 312 |
+
|
| 313 |
+
if not isinstance(data, memoryview):
|
| 314 |
+
# FIXME(GH#44431) these raise on memoryview and attempted fix
|
| 315 |
+
# fails on py3.10
|
| 316 |
+
func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
|
| 317 |
+
result = func(arr).array
|
| 318 |
+
expected = func(data).array
|
| 319 |
+
tm.assert_equal(result, expected)
|
| 320 |
+
|
| 321 |
+
# Let's check the Indexes while we're here
|
| 322 |
+
idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
|
| 323 |
+
result = idx_cls(arr)
|
| 324 |
+
expected = idx_cls(data)
|
| 325 |
+
tm.assert_index_equal(result, expected)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def test_dataframe_consortium() -> None:
|
| 329 |
+
"""
|
| 330 |
+
Test some basic methods of the dataframe consortium standard.
|
| 331 |
+
|
| 332 |
+
Full testing is done at https://github.com/data-apis/dataframe-api-compat,
|
| 333 |
+
this is just to check that the entry point works as expected.
|
| 334 |
+
"""
|
| 335 |
+
pytest.importorskip("dataframe_api_compat")
|
| 336 |
+
df_pd = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
| 337 |
+
df = df_pd.__dataframe_consortium_standard__()
|
| 338 |
+
result_1 = df.get_column_names()
|
| 339 |
+
expected_1 = ["a", "b"]
|
| 340 |
+
assert result_1 == expected_1
|
| 341 |
+
|
| 342 |
+
ser = Series([1, 2, 3], name="a")
|
| 343 |
+
col = ser.__column_consortium_standard__()
|
| 344 |
+
assert col.name == "a"
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def test_xarray_coerce_unit():
|
| 348 |
+
# GH44053
|
| 349 |
+
xr = pytest.importorskip("xarray")
|
| 350 |
+
|
| 351 |
+
arr = xr.DataArray([1, 2, 3])
|
| 352 |
+
result = pd.to_datetime(arr, unit="ns")
|
| 353 |
+
expected = DatetimeIndex(
|
| 354 |
+
[
|
| 355 |
+
"1970-01-01 00:00:00.000000001",
|
| 356 |
+
"1970-01-01 00:00:00.000000002",
|
| 357 |
+
"1970-01-01 00:00:00.000000003",
|
| 358 |
+
],
|
| 359 |
+
dtype="datetime64[ns]",
|
| 360 |
+
freq=None,
|
| 361 |
+
)
|
| 362 |
+
tm.assert_index_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_errors.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas.errors import (
|
| 4 |
+
AbstractMethodError,
|
| 5 |
+
UndefinedVariableError,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.mark.parametrize(
|
| 12 |
+
"exc",
|
| 13 |
+
[
|
| 14 |
+
"AttributeConflictWarning",
|
| 15 |
+
"CSSWarning",
|
| 16 |
+
"CategoricalConversionWarning",
|
| 17 |
+
"ClosedFileError",
|
| 18 |
+
"DataError",
|
| 19 |
+
"DatabaseError",
|
| 20 |
+
"DtypeWarning",
|
| 21 |
+
"EmptyDataError",
|
| 22 |
+
"IncompatibilityWarning",
|
| 23 |
+
"IndexingError",
|
| 24 |
+
"InvalidColumnName",
|
| 25 |
+
"InvalidComparison",
|
| 26 |
+
"InvalidVersion",
|
| 27 |
+
"LossySetitemError",
|
| 28 |
+
"MergeError",
|
| 29 |
+
"NoBufferPresent",
|
| 30 |
+
"NumExprClobberingError",
|
| 31 |
+
"NumbaUtilError",
|
| 32 |
+
"OptionError",
|
| 33 |
+
"OutOfBoundsDatetime",
|
| 34 |
+
"ParserError",
|
| 35 |
+
"ParserWarning",
|
| 36 |
+
"PerformanceWarning",
|
| 37 |
+
"PossibleDataLossError",
|
| 38 |
+
"PossiblePrecisionLoss",
|
| 39 |
+
"PyperclipException",
|
| 40 |
+
"SettingWithCopyError",
|
| 41 |
+
"SettingWithCopyWarning",
|
| 42 |
+
"SpecificationError",
|
| 43 |
+
"UnsortedIndexError",
|
| 44 |
+
"UnsupportedFunctionCall",
|
| 45 |
+
"ValueLabelTypeMismatch",
|
| 46 |
+
],
|
| 47 |
+
)
|
| 48 |
+
def test_exception_importable(exc):
|
| 49 |
+
from pandas import errors
|
| 50 |
+
|
| 51 |
+
err = getattr(errors, exc)
|
| 52 |
+
assert err is not None
|
| 53 |
+
|
| 54 |
+
# check that we can raise on them
|
| 55 |
+
|
| 56 |
+
msg = "^$"
|
| 57 |
+
|
| 58 |
+
with pytest.raises(err, match=msg):
|
| 59 |
+
raise err()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def test_catch_oob():
|
| 63 |
+
from pandas import errors
|
| 64 |
+
|
| 65 |
+
msg = "Cannot cast 1500-01-01 00:00:00 to unit='ns' without overflow"
|
| 66 |
+
with pytest.raises(errors.OutOfBoundsDatetime, match=msg):
|
| 67 |
+
pd.Timestamp("15000101").as_unit("ns")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@pytest.mark.parametrize(
|
| 71 |
+
"is_local",
|
| 72 |
+
[
|
| 73 |
+
True,
|
| 74 |
+
False,
|
| 75 |
+
],
|
| 76 |
+
)
|
| 77 |
+
def test_catch_undefined_variable_error(is_local):
|
| 78 |
+
variable_name = "x"
|
| 79 |
+
if is_local:
|
| 80 |
+
msg = f"local variable '{variable_name}' is not defined"
|
| 81 |
+
else:
|
| 82 |
+
msg = f"name '{variable_name}' is not defined"
|
| 83 |
+
|
| 84 |
+
with pytest.raises(UndefinedVariableError, match=msg):
|
| 85 |
+
raise UndefinedVariableError(variable_name, is_local)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class Foo:
|
| 89 |
+
@classmethod
|
| 90 |
+
def classmethod(cls):
|
| 91 |
+
raise AbstractMethodError(cls, methodtype="classmethod")
|
| 92 |
+
|
| 93 |
+
@property
|
| 94 |
+
def property(self):
|
| 95 |
+
raise AbstractMethodError(self, methodtype="property")
|
| 96 |
+
|
| 97 |
+
def method(self):
|
| 98 |
+
raise AbstractMethodError(self)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def test_AbstractMethodError_classmethod():
|
| 102 |
+
xpr = "This classmethod must be defined in the concrete class Foo"
|
| 103 |
+
with pytest.raises(AbstractMethodError, match=xpr):
|
| 104 |
+
Foo.classmethod()
|
| 105 |
+
|
| 106 |
+
xpr = "This property must be defined in the concrete class Foo"
|
| 107 |
+
with pytest.raises(AbstractMethodError, match=xpr):
|
| 108 |
+
Foo().property
|
| 109 |
+
|
| 110 |
+
xpr = "This method must be defined in the concrete class Foo"
|
| 111 |
+
with pytest.raises(AbstractMethodError, match=xpr):
|
| 112 |
+
Foo().method()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_expressions.py
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import operator
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
from pandas import option_context
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
from pandas.core.api import (
|
| 10 |
+
DataFrame,
|
| 11 |
+
Index,
|
| 12 |
+
Series,
|
| 13 |
+
)
|
| 14 |
+
from pandas.core.computation import expressions as expr
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@pytest.fixture
|
| 18 |
+
def _frame():
|
| 19 |
+
return DataFrame(
|
| 20 |
+
np.random.default_rng(2).standard_normal((10001, 4)),
|
| 21 |
+
columns=list("ABCD"),
|
| 22 |
+
dtype="float64",
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@pytest.fixture
|
| 27 |
+
def _frame2():
|
| 28 |
+
return DataFrame(
|
| 29 |
+
np.random.default_rng(2).standard_normal((100, 4)),
|
| 30 |
+
columns=list("ABCD"),
|
| 31 |
+
dtype="float64",
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@pytest.fixture
|
| 36 |
+
def _mixed(_frame):
|
| 37 |
+
return DataFrame(
|
| 38 |
+
{
|
| 39 |
+
"A": _frame["A"].copy(),
|
| 40 |
+
"B": _frame["B"].astype("float32"),
|
| 41 |
+
"C": _frame["C"].astype("int64"),
|
| 42 |
+
"D": _frame["D"].astype("int32"),
|
| 43 |
+
}
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@pytest.fixture
|
| 48 |
+
def _mixed2(_frame2):
|
| 49 |
+
return DataFrame(
|
| 50 |
+
{
|
| 51 |
+
"A": _frame2["A"].copy(),
|
| 52 |
+
"B": _frame2["B"].astype("float32"),
|
| 53 |
+
"C": _frame2["C"].astype("int64"),
|
| 54 |
+
"D": _frame2["D"].astype("int32"),
|
| 55 |
+
}
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@pytest.fixture
|
| 60 |
+
def _integer():
|
| 61 |
+
return DataFrame(
|
| 62 |
+
np.random.default_rng(2).integers(1, 100, size=(10001, 4)),
|
| 63 |
+
columns=list("ABCD"),
|
| 64 |
+
dtype="int64",
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@pytest.fixture
|
| 69 |
+
def _integer_integers(_integer):
|
| 70 |
+
# integers to get a case with zeros
|
| 71 |
+
return _integer * np.random.default_rng(2).integers(0, 2, size=np.shape(_integer))
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@pytest.fixture
|
| 75 |
+
def _integer2():
|
| 76 |
+
return DataFrame(
|
| 77 |
+
np.random.default_rng(2).integers(1, 100, size=(101, 4)),
|
| 78 |
+
columns=list("ABCD"),
|
| 79 |
+
dtype="int64",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
@pytest.fixture
|
| 84 |
+
def _array(_frame):
|
| 85 |
+
return _frame["A"].values.copy()
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@pytest.fixture
|
| 89 |
+
def _array2(_frame2):
|
| 90 |
+
return _frame2["A"].values.copy()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@pytest.fixture
|
| 94 |
+
def _array_mixed(_mixed):
|
| 95 |
+
return _mixed["D"].values.copy()
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
@pytest.fixture
|
| 99 |
+
def _array_mixed2(_mixed2):
|
| 100 |
+
return _mixed2["D"].values.copy()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@pytest.mark.skipif(not expr.USE_NUMEXPR, reason="not using numexpr")
|
| 104 |
+
class TestExpressions:
|
| 105 |
+
@staticmethod
|
| 106 |
+
def call_op(df, other, flex: bool, opname: str):
|
| 107 |
+
if flex:
|
| 108 |
+
op = lambda x, y: getattr(x, opname)(y)
|
| 109 |
+
op.__name__ = opname
|
| 110 |
+
else:
|
| 111 |
+
op = getattr(operator, opname)
|
| 112 |
+
|
| 113 |
+
with option_context("compute.use_numexpr", False):
|
| 114 |
+
expected = op(df, other)
|
| 115 |
+
|
| 116 |
+
expr.get_test_result()
|
| 117 |
+
|
| 118 |
+
result = op(df, other)
|
| 119 |
+
return result, expected
|
| 120 |
+
|
| 121 |
+
@pytest.mark.parametrize(
|
| 122 |
+
"fixture",
|
| 123 |
+
[
|
| 124 |
+
"_integer",
|
| 125 |
+
"_integer2",
|
| 126 |
+
"_integer_integers",
|
| 127 |
+
"_frame",
|
| 128 |
+
"_frame2",
|
| 129 |
+
"_mixed",
|
| 130 |
+
"_mixed2",
|
| 131 |
+
],
|
| 132 |
+
)
|
| 133 |
+
@pytest.mark.parametrize("flex", [True, False])
|
| 134 |
+
@pytest.mark.parametrize(
|
| 135 |
+
"arith", ["add", "sub", "mul", "mod", "truediv", "floordiv"]
|
| 136 |
+
)
|
| 137 |
+
def test_run_arithmetic(self, request, fixture, flex, arith, monkeypatch):
|
| 138 |
+
df = request.getfixturevalue(fixture)
|
| 139 |
+
with monkeypatch.context() as m:
|
| 140 |
+
m.setattr(expr, "_MIN_ELEMENTS", 0)
|
| 141 |
+
result, expected = self.call_op(df, df, flex, arith)
|
| 142 |
+
|
| 143 |
+
if arith == "truediv":
|
| 144 |
+
assert all(x.kind == "f" for x in expected.dtypes.values)
|
| 145 |
+
tm.assert_equal(expected, result)
|
| 146 |
+
|
| 147 |
+
for i in range(len(df.columns)):
|
| 148 |
+
result, expected = self.call_op(
|
| 149 |
+
df.iloc[:, i], df.iloc[:, i], flex, arith
|
| 150 |
+
)
|
| 151 |
+
if arith == "truediv":
|
| 152 |
+
assert expected.dtype.kind == "f"
|
| 153 |
+
tm.assert_equal(expected, result)
|
| 154 |
+
|
| 155 |
+
@pytest.mark.parametrize(
|
| 156 |
+
"fixture",
|
| 157 |
+
[
|
| 158 |
+
"_integer",
|
| 159 |
+
"_integer2",
|
| 160 |
+
"_integer_integers",
|
| 161 |
+
"_frame",
|
| 162 |
+
"_frame2",
|
| 163 |
+
"_mixed",
|
| 164 |
+
"_mixed2",
|
| 165 |
+
],
|
| 166 |
+
)
|
| 167 |
+
@pytest.mark.parametrize("flex", [True, False])
|
| 168 |
+
def test_run_binary(self, request, fixture, flex, comparison_op, monkeypatch):
|
| 169 |
+
"""
|
| 170 |
+
tests solely that the result is the same whether or not numexpr is
|
| 171 |
+
enabled. Need to test whether the function does the correct thing
|
| 172 |
+
elsewhere.
|
| 173 |
+
"""
|
| 174 |
+
df = request.getfixturevalue(fixture)
|
| 175 |
+
arith = comparison_op.__name__
|
| 176 |
+
with option_context("compute.use_numexpr", False):
|
| 177 |
+
other = df.copy() + 1
|
| 178 |
+
|
| 179 |
+
with monkeypatch.context() as m:
|
| 180 |
+
m.setattr(expr, "_MIN_ELEMENTS", 0)
|
| 181 |
+
expr.set_test_mode(True)
|
| 182 |
+
|
| 183 |
+
result, expected = self.call_op(df, other, flex, arith)
|
| 184 |
+
|
| 185 |
+
used_numexpr = expr.get_test_result()
|
| 186 |
+
assert used_numexpr, "Did not use numexpr as expected."
|
| 187 |
+
tm.assert_equal(expected, result)
|
| 188 |
+
|
| 189 |
+
for i in range(len(df.columns)):
|
| 190 |
+
binary_comp = other.iloc[:, i] + 1
|
| 191 |
+
self.call_op(df.iloc[:, i], binary_comp, flex, "add")
|
| 192 |
+
|
| 193 |
+
def test_invalid(self):
|
| 194 |
+
array = np.random.default_rng(2).standard_normal(1_000_001)
|
| 195 |
+
array2 = np.random.default_rng(2).standard_normal(100)
|
| 196 |
+
|
| 197 |
+
# no op
|
| 198 |
+
result = expr._can_use_numexpr(operator.add, None, array, array, "evaluate")
|
| 199 |
+
assert not result
|
| 200 |
+
|
| 201 |
+
# min elements
|
| 202 |
+
result = expr._can_use_numexpr(operator.add, "+", array2, array2, "evaluate")
|
| 203 |
+
assert not result
|
| 204 |
+
|
| 205 |
+
# ok, we only check on first part of expression
|
| 206 |
+
result = expr._can_use_numexpr(operator.add, "+", array, array2, "evaluate")
|
| 207 |
+
assert result
|
| 208 |
+
|
| 209 |
+
@pytest.mark.filterwarnings("ignore:invalid value encountered in:RuntimeWarning")
|
| 210 |
+
@pytest.mark.parametrize(
|
| 211 |
+
"opname,op_str",
|
| 212 |
+
[("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")],
|
| 213 |
+
)
|
| 214 |
+
@pytest.mark.parametrize(
|
| 215 |
+
"left_fix,right_fix", [("_array", "_array2"), ("_array_mixed", "_array_mixed2")]
|
| 216 |
+
)
|
| 217 |
+
def test_binary_ops(self, request, opname, op_str, left_fix, right_fix):
|
| 218 |
+
left = request.getfixturevalue(left_fix)
|
| 219 |
+
right = request.getfixturevalue(right_fix)
|
| 220 |
+
|
| 221 |
+
def testit(left, right, opname, op_str):
|
| 222 |
+
if opname == "pow":
|
| 223 |
+
left = np.abs(left)
|
| 224 |
+
|
| 225 |
+
op = getattr(operator, opname)
|
| 226 |
+
|
| 227 |
+
# array has 0s
|
| 228 |
+
result = expr.evaluate(op, left, left, use_numexpr=True)
|
| 229 |
+
expected = expr.evaluate(op, left, left, use_numexpr=False)
|
| 230 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 231 |
+
|
| 232 |
+
result = expr._can_use_numexpr(op, op_str, right, right, "evaluate")
|
| 233 |
+
assert not result
|
| 234 |
+
|
| 235 |
+
with option_context("compute.use_numexpr", False):
|
| 236 |
+
testit(left, right, opname, op_str)
|
| 237 |
+
|
| 238 |
+
expr.set_numexpr_threads(1)
|
| 239 |
+
testit(left, right, opname, op_str)
|
| 240 |
+
expr.set_numexpr_threads()
|
| 241 |
+
testit(left, right, opname, op_str)
|
| 242 |
+
|
| 243 |
+
@pytest.mark.parametrize(
|
| 244 |
+
"left_fix,right_fix", [("_array", "_array2"), ("_array_mixed", "_array_mixed2")]
|
| 245 |
+
)
|
| 246 |
+
def test_comparison_ops(self, request, comparison_op, left_fix, right_fix):
|
| 247 |
+
left = request.getfixturevalue(left_fix)
|
| 248 |
+
right = request.getfixturevalue(right_fix)
|
| 249 |
+
|
| 250 |
+
def testit():
|
| 251 |
+
f12 = left + 1
|
| 252 |
+
f22 = right + 1
|
| 253 |
+
|
| 254 |
+
op = comparison_op
|
| 255 |
+
|
| 256 |
+
result = expr.evaluate(op, left, f12, use_numexpr=True)
|
| 257 |
+
expected = expr.evaluate(op, left, f12, use_numexpr=False)
|
| 258 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 259 |
+
|
| 260 |
+
result = expr._can_use_numexpr(op, op, right, f22, "evaluate")
|
| 261 |
+
assert not result
|
| 262 |
+
|
| 263 |
+
with option_context("compute.use_numexpr", False):
|
| 264 |
+
testit()
|
| 265 |
+
|
| 266 |
+
expr.set_numexpr_threads(1)
|
| 267 |
+
testit()
|
| 268 |
+
expr.set_numexpr_threads()
|
| 269 |
+
testit()
|
| 270 |
+
|
| 271 |
+
@pytest.mark.parametrize("cond", [True, False])
|
| 272 |
+
@pytest.mark.parametrize("fixture", ["_frame", "_frame2", "_mixed", "_mixed2"])
|
| 273 |
+
def test_where(self, request, cond, fixture):
|
| 274 |
+
df = request.getfixturevalue(fixture)
|
| 275 |
+
|
| 276 |
+
def testit():
|
| 277 |
+
c = np.empty(df.shape, dtype=np.bool_)
|
| 278 |
+
c.fill(cond)
|
| 279 |
+
result = expr.where(c, df.values, df.values + 1)
|
| 280 |
+
expected = np.where(c, df.values, df.values + 1)
|
| 281 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 282 |
+
|
| 283 |
+
with option_context("compute.use_numexpr", False):
|
| 284 |
+
testit()
|
| 285 |
+
|
| 286 |
+
expr.set_numexpr_threads(1)
|
| 287 |
+
testit()
|
| 288 |
+
expr.set_numexpr_threads()
|
| 289 |
+
testit()
|
| 290 |
+
|
| 291 |
+
@pytest.mark.parametrize(
|
| 292 |
+
"op_str,opname", [("/", "truediv"), ("//", "floordiv"), ("**", "pow")]
|
| 293 |
+
)
|
| 294 |
+
def test_bool_ops_raise_on_arithmetic(self, op_str, opname):
|
| 295 |
+
df = DataFrame(
|
| 296 |
+
{
|
| 297 |
+
"a": np.random.default_rng(2).random(10) > 0.5,
|
| 298 |
+
"b": np.random.default_rng(2).random(10) > 0.5,
|
| 299 |
+
}
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
msg = f"operator '{opname}' not implemented for bool dtypes"
|
| 303 |
+
f = getattr(operator, opname)
|
| 304 |
+
err_msg = re.escape(msg)
|
| 305 |
+
|
| 306 |
+
with pytest.raises(NotImplementedError, match=err_msg):
|
| 307 |
+
f(df, df)
|
| 308 |
+
|
| 309 |
+
with pytest.raises(NotImplementedError, match=err_msg):
|
| 310 |
+
f(df.a, df.b)
|
| 311 |
+
|
| 312 |
+
with pytest.raises(NotImplementedError, match=err_msg):
|
| 313 |
+
f(df.a, True)
|
| 314 |
+
|
| 315 |
+
with pytest.raises(NotImplementedError, match=err_msg):
|
| 316 |
+
f(False, df.a)
|
| 317 |
+
|
| 318 |
+
with pytest.raises(NotImplementedError, match=err_msg):
|
| 319 |
+
f(False, df)
|
| 320 |
+
|
| 321 |
+
with pytest.raises(NotImplementedError, match=err_msg):
|
| 322 |
+
f(df, True)
|
| 323 |
+
|
| 324 |
+
@pytest.mark.parametrize(
|
| 325 |
+
"op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")]
|
| 326 |
+
)
|
| 327 |
+
def test_bool_ops_warn_on_arithmetic(self, op_str, opname):
|
| 328 |
+
n = 10
|
| 329 |
+
df = DataFrame(
|
| 330 |
+
{
|
| 331 |
+
"a": np.random.default_rng(2).random(n) > 0.5,
|
| 332 |
+
"b": np.random.default_rng(2).random(n) > 0.5,
|
| 333 |
+
}
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
subs = {"+": "|", "*": "&", "-": "^"}
|
| 337 |
+
sub_funcs = {"|": "or_", "&": "and_", "^": "xor"}
|
| 338 |
+
|
| 339 |
+
f = getattr(operator, opname)
|
| 340 |
+
fe = getattr(operator, sub_funcs[subs[op_str]])
|
| 341 |
+
|
| 342 |
+
if op_str == "-":
|
| 343 |
+
# raises TypeError
|
| 344 |
+
return
|
| 345 |
+
|
| 346 |
+
with tm.use_numexpr(True, min_elements=5):
|
| 347 |
+
with tm.assert_produces_warning():
|
| 348 |
+
r = f(df, df)
|
| 349 |
+
e = fe(df, df)
|
| 350 |
+
tm.assert_frame_equal(r, e)
|
| 351 |
+
|
| 352 |
+
with tm.assert_produces_warning():
|
| 353 |
+
r = f(df.a, df.b)
|
| 354 |
+
e = fe(df.a, df.b)
|
| 355 |
+
tm.assert_series_equal(r, e)
|
| 356 |
+
|
| 357 |
+
with tm.assert_produces_warning():
|
| 358 |
+
r = f(df.a, True)
|
| 359 |
+
e = fe(df.a, True)
|
| 360 |
+
tm.assert_series_equal(r, e)
|
| 361 |
+
|
| 362 |
+
with tm.assert_produces_warning():
|
| 363 |
+
r = f(False, df.a)
|
| 364 |
+
e = fe(False, df.a)
|
| 365 |
+
tm.assert_series_equal(r, e)
|
| 366 |
+
|
| 367 |
+
with tm.assert_produces_warning():
|
| 368 |
+
r = f(False, df)
|
| 369 |
+
e = fe(False, df)
|
| 370 |
+
tm.assert_frame_equal(r, e)
|
| 371 |
+
|
| 372 |
+
with tm.assert_produces_warning():
|
| 373 |
+
r = f(df, True)
|
| 374 |
+
e = fe(df, True)
|
| 375 |
+
tm.assert_frame_equal(r, e)
|
| 376 |
+
|
| 377 |
+
@pytest.mark.parametrize(
|
| 378 |
+
"test_input,expected",
|
| 379 |
+
[
|
| 380 |
+
(
|
| 381 |
+
DataFrame(
|
| 382 |
+
[[0, 1, 2, "aa"], [0, 1, 2, "aa"]], columns=["a", "b", "c", "dtype"]
|
| 383 |
+
),
|
| 384 |
+
DataFrame([[False, False], [False, False]], columns=["a", "dtype"]),
|
| 385 |
+
),
|
| 386 |
+
(
|
| 387 |
+
DataFrame(
|
| 388 |
+
[[0, 3, 2, "aa"], [0, 4, 2, "aa"], [0, 1, 1, "bb"]],
|
| 389 |
+
columns=["a", "b", "c", "dtype"],
|
| 390 |
+
),
|
| 391 |
+
DataFrame(
|
| 392 |
+
[[False, False], [False, False], [False, False]],
|
| 393 |
+
columns=["a", "dtype"],
|
| 394 |
+
),
|
| 395 |
+
),
|
| 396 |
+
],
|
| 397 |
+
)
|
| 398 |
+
def test_bool_ops_column_name_dtype(self, test_input, expected):
|
| 399 |
+
# GH 22383 - .ne fails if columns containing column name 'dtype'
|
| 400 |
+
result = test_input.loc[:, ["a", "dtype"]].ne(test_input.loc[:, ["a", "dtype"]])
|
| 401 |
+
tm.assert_frame_equal(result, expected)
|
| 402 |
+
|
| 403 |
+
@pytest.mark.parametrize(
|
| 404 |
+
"arith", ("add", "sub", "mul", "mod", "truediv", "floordiv")
|
| 405 |
+
)
|
| 406 |
+
@pytest.mark.parametrize("axis", (0, 1))
|
| 407 |
+
def test_frame_series_axis(self, axis, arith, _frame, monkeypatch):
|
| 408 |
+
# GH#26736 Dataframe.floordiv(Series, axis=1) fails
|
| 409 |
+
|
| 410 |
+
df = _frame
|
| 411 |
+
if axis == 1:
|
| 412 |
+
other = df.iloc[0, :]
|
| 413 |
+
else:
|
| 414 |
+
other = df.iloc[:, 0]
|
| 415 |
+
|
| 416 |
+
with monkeypatch.context() as m:
|
| 417 |
+
m.setattr(expr, "_MIN_ELEMENTS", 0)
|
| 418 |
+
|
| 419 |
+
op_func = getattr(df, arith)
|
| 420 |
+
|
| 421 |
+
with option_context("compute.use_numexpr", False):
|
| 422 |
+
expected = op_func(other, axis=axis)
|
| 423 |
+
|
| 424 |
+
result = op_func(other, axis=axis)
|
| 425 |
+
tm.assert_frame_equal(expected, result)
|
| 426 |
+
|
| 427 |
+
@pytest.mark.parametrize(
|
| 428 |
+
"op",
|
| 429 |
+
[
|
| 430 |
+
"__mod__",
|
| 431 |
+
"__rmod__",
|
| 432 |
+
"__floordiv__",
|
| 433 |
+
"__rfloordiv__",
|
| 434 |
+
],
|
| 435 |
+
)
|
| 436 |
+
@pytest.mark.parametrize("box", [DataFrame, Series, Index])
|
| 437 |
+
@pytest.mark.parametrize("scalar", [-5, 5])
|
| 438 |
+
def test_python_semantics_with_numexpr_installed(
|
| 439 |
+
self, op, box, scalar, monkeypatch
|
| 440 |
+
):
|
| 441 |
+
# https://github.com/pandas-dev/pandas/issues/36047
|
| 442 |
+
with monkeypatch.context() as m:
|
| 443 |
+
m.setattr(expr, "_MIN_ELEMENTS", 0)
|
| 444 |
+
data = np.arange(-50, 50)
|
| 445 |
+
obj = box(data)
|
| 446 |
+
method = getattr(obj, op)
|
| 447 |
+
result = method(scalar)
|
| 448 |
+
|
| 449 |
+
# compare result with numpy
|
| 450 |
+
with option_context("compute.use_numexpr", False):
|
| 451 |
+
expected = method(scalar)
|
| 452 |
+
|
| 453 |
+
tm.assert_equal(result, expected)
|
| 454 |
+
|
| 455 |
+
# compare result element-wise with Python
|
| 456 |
+
for i, elem in enumerate(data):
|
| 457 |
+
if box == DataFrame:
|
| 458 |
+
scalar_result = result.iloc[i, 0]
|
| 459 |
+
else:
|
| 460 |
+
scalar_result = result[i]
|
| 461 |
+
try:
|
| 462 |
+
expected = getattr(int(elem), op)(scalar)
|
| 463 |
+
except ZeroDivisionError:
|
| 464 |
+
pass
|
| 465 |
+
else:
|
| 466 |
+
assert scalar_result == expected
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_flags.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TestFlags:
|
| 7 |
+
def test_equality(self):
|
| 8 |
+
a = pd.DataFrame().set_flags(allows_duplicate_labels=True).flags
|
| 9 |
+
b = pd.DataFrame().set_flags(allows_duplicate_labels=False).flags
|
| 10 |
+
|
| 11 |
+
assert a == a
|
| 12 |
+
assert b == b
|
| 13 |
+
assert a != b
|
| 14 |
+
assert a != 2
|
| 15 |
+
|
| 16 |
+
def test_set(self):
|
| 17 |
+
df = pd.DataFrame().set_flags(allows_duplicate_labels=True)
|
| 18 |
+
a = df.flags
|
| 19 |
+
a.allows_duplicate_labels = False
|
| 20 |
+
assert a.allows_duplicate_labels is False
|
| 21 |
+
a["allows_duplicate_labels"] = True
|
| 22 |
+
assert a.allows_duplicate_labels is True
|
| 23 |
+
|
| 24 |
+
def test_repr(self):
|
| 25 |
+
a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=True).flags)
|
| 26 |
+
assert a == "<Flags(allows_duplicate_labels=True)>"
|
| 27 |
+
a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=False).flags)
|
| 28 |
+
assert a == "<Flags(allows_duplicate_labels=False)>"
|
| 29 |
+
|
| 30 |
+
def test_obj_ref(self):
|
| 31 |
+
df = pd.DataFrame()
|
| 32 |
+
flags = df.flags
|
| 33 |
+
del df
|
| 34 |
+
with pytest.raises(ValueError, match="object has been deleted"):
|
| 35 |
+
flags.allows_duplicate_labels = True
|
| 36 |
+
|
| 37 |
+
def test_getitem(self):
|
| 38 |
+
df = pd.DataFrame()
|
| 39 |
+
flags = df.flags
|
| 40 |
+
assert flags["allows_duplicate_labels"] is True
|
| 41 |
+
flags["allows_duplicate_labels"] = False
|
| 42 |
+
assert flags["allows_duplicate_labels"] is False
|
| 43 |
+
|
| 44 |
+
with pytest.raises(KeyError, match="a"):
|
| 45 |
+
flags["a"]
|
| 46 |
+
|
| 47 |
+
with pytest.raises(ValueError, match="a"):
|
| 48 |
+
flags["a"] = 10
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_multilevel.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
DataFrame,
|
| 9 |
+
MultiIndex,
|
| 10 |
+
Series,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TestMultiLevel:
|
| 16 |
+
def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data):
|
| 17 |
+
# axis=0
|
| 18 |
+
ymd = multiindex_year_month_day_dataframe_random_data
|
| 19 |
+
|
| 20 |
+
month_sums = ymd.groupby("month").sum()
|
| 21 |
+
result = month_sums.reindex(ymd.index, level=1)
|
| 22 |
+
expected = ymd.groupby(level="month").transform("sum")
|
| 23 |
+
|
| 24 |
+
tm.assert_frame_equal(result, expected)
|
| 25 |
+
|
| 26 |
+
# Series
|
| 27 |
+
result = month_sums["A"].reindex(ymd.index, level=1)
|
| 28 |
+
expected = ymd["A"].groupby(level="month").transform("sum")
|
| 29 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 30 |
+
|
| 31 |
+
# axis=1
|
| 32 |
+
msg = "DataFrame.groupby with axis=1 is deprecated"
|
| 33 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 34 |
+
gb = ymd.T.groupby("month", axis=1)
|
| 35 |
+
|
| 36 |
+
month_sums = gb.sum()
|
| 37 |
+
result = month_sums.reindex(columns=ymd.index, level=1)
|
| 38 |
+
expected = ymd.groupby(level="month").transform("sum").T
|
| 39 |
+
tm.assert_frame_equal(result, expected)
|
| 40 |
+
|
| 41 |
+
def test_reindex(self, multiindex_dataframe_random_data):
|
| 42 |
+
frame = multiindex_dataframe_random_data
|
| 43 |
+
|
| 44 |
+
expected = frame.iloc[[0, 3]]
|
| 45 |
+
reindexed = frame.loc[[("foo", "one"), ("bar", "one")]]
|
| 46 |
+
tm.assert_frame_equal(reindexed, expected)
|
| 47 |
+
|
| 48 |
+
def test_reindex_preserve_levels(
|
| 49 |
+
self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write
|
| 50 |
+
):
|
| 51 |
+
ymd = multiindex_year_month_day_dataframe_random_data
|
| 52 |
+
|
| 53 |
+
new_index = ymd.index[::10]
|
| 54 |
+
chunk = ymd.reindex(new_index)
|
| 55 |
+
if using_copy_on_write:
|
| 56 |
+
assert chunk.index.is_(new_index)
|
| 57 |
+
else:
|
| 58 |
+
assert chunk.index is new_index
|
| 59 |
+
|
| 60 |
+
chunk = ymd.loc[new_index]
|
| 61 |
+
assert chunk.index.equals(new_index)
|
| 62 |
+
|
| 63 |
+
ymdT = ymd.T
|
| 64 |
+
chunk = ymdT.reindex(columns=new_index)
|
| 65 |
+
if using_copy_on_write:
|
| 66 |
+
assert chunk.columns.is_(new_index)
|
| 67 |
+
else:
|
| 68 |
+
assert chunk.columns is new_index
|
| 69 |
+
|
| 70 |
+
chunk = ymdT.loc[:, new_index]
|
| 71 |
+
assert chunk.columns.equals(new_index)
|
| 72 |
+
|
| 73 |
+
def test_groupby_transform(self, multiindex_dataframe_random_data):
|
| 74 |
+
frame = multiindex_dataframe_random_data
|
| 75 |
+
|
| 76 |
+
s = frame["A"]
|
| 77 |
+
grouper = s.index.get_level_values(0)
|
| 78 |
+
|
| 79 |
+
grouped = s.groupby(grouper, group_keys=False)
|
| 80 |
+
|
| 81 |
+
applied = grouped.apply(lambda x: x * 2)
|
| 82 |
+
expected = grouped.transform(lambda x: x * 2)
|
| 83 |
+
result = applied.reindex(expected.index)
|
| 84 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 85 |
+
|
| 86 |
+
def test_groupby_corner(self):
|
| 87 |
+
midx = MultiIndex(
|
| 88 |
+
levels=[["foo"], ["bar"], ["baz"]],
|
| 89 |
+
codes=[[0], [0], [0]],
|
| 90 |
+
names=["one", "two", "three"],
|
| 91 |
+
)
|
| 92 |
+
df = DataFrame(
|
| 93 |
+
[np.random.default_rng(2).random(4)],
|
| 94 |
+
columns=["a", "b", "c", "d"],
|
| 95 |
+
index=midx,
|
| 96 |
+
)
|
| 97 |
+
# should work
|
| 98 |
+
df.groupby(level="three")
|
| 99 |
+
|
| 100 |
+
def test_groupby_level_no_obs(self):
|
| 101 |
+
# #1697
|
| 102 |
+
midx = MultiIndex.from_tuples(
|
| 103 |
+
[
|
| 104 |
+
("f1", "s1"),
|
| 105 |
+
("f1", "s2"),
|
| 106 |
+
("f2", "s1"),
|
| 107 |
+
("f2", "s2"),
|
| 108 |
+
("f3", "s1"),
|
| 109 |
+
("f3", "s2"),
|
| 110 |
+
]
|
| 111 |
+
)
|
| 112 |
+
df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx)
|
| 113 |
+
df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])]
|
| 114 |
+
|
| 115 |
+
msg = "DataFrame.groupby with axis=1 is deprecated"
|
| 116 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 117 |
+
grouped = df1.groupby(axis=1, level=0)
|
| 118 |
+
result = grouped.sum()
|
| 119 |
+
assert (result.columns == ["f2", "f3"]).all()
|
| 120 |
+
|
| 121 |
+
def test_setitem_with_expansion_multiindex_columns(
|
| 122 |
+
self, multiindex_year_month_day_dataframe_random_data
|
| 123 |
+
):
|
| 124 |
+
ymd = multiindex_year_month_day_dataframe_random_data
|
| 125 |
+
|
| 126 |
+
df = ymd[:5].T
|
| 127 |
+
df[2000, 1, 10] = df[2000, 1, 7]
|
| 128 |
+
assert isinstance(df.columns, MultiIndex)
|
| 129 |
+
assert (df[2000, 1, 10] == df[2000, 1, 7]).all()
|
| 130 |
+
|
| 131 |
+
def test_alignment(self):
|
| 132 |
+
x = Series(
|
| 133 |
+
data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
y = Series(
|
| 137 |
+
data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
res = x - y
|
| 141 |
+
exp_index = x.index.union(y.index)
|
| 142 |
+
exp = x.reindex(exp_index) - y.reindex(exp_index)
|
| 143 |
+
tm.assert_series_equal(res, exp)
|
| 144 |
+
|
| 145 |
+
# hit non-monotonic code path
|
| 146 |
+
res = x[::-1] - y[::-1]
|
| 147 |
+
exp_index = x.index.union(y.index)
|
| 148 |
+
exp = x.reindex(exp_index) - y.reindex(exp_index)
|
| 149 |
+
tm.assert_series_equal(res, exp)
|
| 150 |
+
|
| 151 |
+
def test_groupby_multilevel(self, multiindex_year_month_day_dataframe_random_data):
|
| 152 |
+
ymd = multiindex_year_month_day_dataframe_random_data
|
| 153 |
+
|
| 154 |
+
result = ymd.groupby(level=[0, 1]).mean()
|
| 155 |
+
|
| 156 |
+
k1 = ymd.index.get_level_values(0)
|
| 157 |
+
k2 = ymd.index.get_level_values(1)
|
| 158 |
+
|
| 159 |
+
expected = ymd.groupby([k1, k2]).mean()
|
| 160 |
+
|
| 161 |
+
# TODO groupby with level_values drops names
|
| 162 |
+
tm.assert_frame_equal(result, expected, check_names=False)
|
| 163 |
+
assert result.index.names == ymd.index.names[:2]
|
| 164 |
+
|
| 165 |
+
result2 = ymd.groupby(level=ymd.index.names[:2]).mean()
|
| 166 |
+
tm.assert_frame_equal(result, result2)
|
| 167 |
+
|
| 168 |
+
def test_multilevel_consolidate(self):
|
| 169 |
+
index = MultiIndex.from_tuples(
|
| 170 |
+
[("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")]
|
| 171 |
+
)
|
| 172 |
+
df = DataFrame(
|
| 173 |
+
np.random.default_rng(2).standard_normal((4, 4)), index=index, columns=index
|
| 174 |
+
)
|
| 175 |
+
df["Totals", ""] = df.sum(1)
|
| 176 |
+
df = df._consolidate()
|
| 177 |
+
|
| 178 |
+
def test_level_with_tuples(self):
|
| 179 |
+
index = MultiIndex(
|
| 180 |
+
levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
|
| 181 |
+
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
series = Series(np.random.default_rng(2).standard_normal(6), index=index)
|
| 185 |
+
frame = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
|
| 186 |
+
|
| 187 |
+
result = series[("foo", "bar", 0)]
|
| 188 |
+
result2 = series.loc[("foo", "bar", 0)]
|
| 189 |
+
expected = series[:2]
|
| 190 |
+
expected.index = expected.index.droplevel(0)
|
| 191 |
+
tm.assert_series_equal(result, expected)
|
| 192 |
+
tm.assert_series_equal(result2, expected)
|
| 193 |
+
|
| 194 |
+
with pytest.raises(KeyError, match=r"^\(\('foo', 'bar', 0\), 2\)$"):
|
| 195 |
+
series[("foo", "bar", 0), 2]
|
| 196 |
+
|
| 197 |
+
result = frame.loc[("foo", "bar", 0)]
|
| 198 |
+
result2 = frame.xs(("foo", "bar", 0))
|
| 199 |
+
expected = frame[:2]
|
| 200 |
+
expected.index = expected.index.droplevel(0)
|
| 201 |
+
tm.assert_frame_equal(result, expected)
|
| 202 |
+
tm.assert_frame_equal(result2, expected)
|
| 203 |
+
|
| 204 |
+
index = MultiIndex(
|
| 205 |
+
levels=[[("foo", "bar"), ("foo", "baz"), ("foo", "qux")], [0, 1]],
|
| 206 |
+
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
series = Series(np.random.default_rng(2).standard_normal(6), index=index)
|
| 210 |
+
frame = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
|
| 211 |
+
|
| 212 |
+
result = series[("foo", "bar")]
|
| 213 |
+
result2 = series.loc[("foo", "bar")]
|
| 214 |
+
expected = series[:2]
|
| 215 |
+
expected.index = expected.index.droplevel(0)
|
| 216 |
+
tm.assert_series_equal(result, expected)
|
| 217 |
+
tm.assert_series_equal(result2, expected)
|
| 218 |
+
|
| 219 |
+
result = frame.loc[("foo", "bar")]
|
| 220 |
+
result2 = frame.xs(("foo", "bar"))
|
| 221 |
+
expected = frame[:2]
|
| 222 |
+
expected.index = expected.index.droplevel(0)
|
| 223 |
+
tm.assert_frame_equal(result, expected)
|
| 224 |
+
tm.assert_frame_equal(result2, expected)
|
| 225 |
+
|
| 226 |
+
def test_reindex_level_partial_selection(self, multiindex_dataframe_random_data):
|
| 227 |
+
frame = multiindex_dataframe_random_data
|
| 228 |
+
|
| 229 |
+
result = frame.reindex(["foo", "qux"], level=0)
|
| 230 |
+
expected = frame.iloc[[0, 1, 2, 7, 8, 9]]
|
| 231 |
+
tm.assert_frame_equal(result, expected)
|
| 232 |
+
|
| 233 |
+
result = frame.T.reindex(["foo", "qux"], axis=1, level=0)
|
| 234 |
+
tm.assert_frame_equal(result, expected.T)
|
| 235 |
+
|
| 236 |
+
result = frame.loc[["foo", "qux"]]
|
| 237 |
+
tm.assert_frame_equal(result, expected)
|
| 238 |
+
|
| 239 |
+
result = frame["A"].loc[["foo", "qux"]]
|
| 240 |
+
tm.assert_series_equal(result, expected["A"])
|
| 241 |
+
|
| 242 |
+
result = frame.T.loc[:, ["foo", "qux"]]
|
| 243 |
+
tm.assert_frame_equal(result, expected.T)
|
| 244 |
+
|
| 245 |
+
@pytest.mark.parametrize("d", [4, "d"])
|
| 246 |
+
def test_empty_frame_groupby_dtypes_consistency(self, d):
|
| 247 |
+
# GH 20888
|
| 248 |
+
group_keys = ["a", "b", "c"]
|
| 249 |
+
df = DataFrame({"a": [1], "b": [2], "c": [3], "d": [d]})
|
| 250 |
+
|
| 251 |
+
g = df[df.a == 2].groupby(group_keys)
|
| 252 |
+
result = g.first().index
|
| 253 |
+
expected = MultiIndex(
|
| 254 |
+
levels=[[1], [2], [3]], codes=[[], [], []], names=["a", "b", "c"]
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
tm.assert_index_equal(result, expected)
|
| 258 |
+
|
| 259 |
+
def test_duplicate_groupby_issues(self):
|
| 260 |
+
idx_tp = [
|
| 261 |
+
("600809", "20061231"),
|
| 262 |
+
("600809", "20070331"),
|
| 263 |
+
("600809", "20070630"),
|
| 264 |
+
("600809", "20070331"),
|
| 265 |
+
]
|
| 266 |
+
dt = ["demo", "demo", "demo", "demo"]
|
| 267 |
+
|
| 268 |
+
idx = MultiIndex.from_tuples(idx_tp, names=["STK_ID", "RPT_Date"])
|
| 269 |
+
s = Series(dt, index=idx)
|
| 270 |
+
|
| 271 |
+
result = s.groupby(s.index).first()
|
| 272 |
+
assert len(result) == 3
|
| 273 |
+
|
| 274 |
+
def test_subsets_multiindex_dtype(self):
|
| 275 |
+
# GH 20757
|
| 276 |
+
data = [["x", 1]]
|
| 277 |
+
columns = [("a", "b", np.nan), ("a", "c", 0.0)]
|
| 278 |
+
df = DataFrame(data, columns=MultiIndex.from_tuples(columns))
|
| 279 |
+
expected = df.dtypes.a.b
|
| 280 |
+
result = df.a.b.dtypes
|
| 281 |
+
tm.assert_series_equal(result, expected)
|
| 282 |
+
|
| 283 |
+
def test_datetime_object_multiindex(self):
|
| 284 |
+
data_dic = {
|
| 285 |
+
(0, datetime.date(2018, 3, 3)): {"A": 1, "B": 10},
|
| 286 |
+
(0, datetime.date(2018, 3, 4)): {"A": 2, "B": 11},
|
| 287 |
+
(1, datetime.date(2018, 3, 3)): {"A": 3, "B": 12},
|
| 288 |
+
(1, datetime.date(2018, 3, 4)): {"A": 4, "B": 13},
|
| 289 |
+
}
|
| 290 |
+
result = DataFrame.from_dict(data_dic, orient="index")
|
| 291 |
+
data = {"A": [1, 2, 3, 4], "B": [10, 11, 12, 13]}
|
| 292 |
+
index = [
|
| 293 |
+
[0, 0, 1, 1],
|
| 294 |
+
[
|
| 295 |
+
datetime.date(2018, 3, 3),
|
| 296 |
+
datetime.date(2018, 3, 4),
|
| 297 |
+
datetime.date(2018, 3, 3),
|
| 298 |
+
datetime.date(2018, 3, 4),
|
| 299 |
+
],
|
| 300 |
+
]
|
| 301 |
+
expected = DataFrame(data=data, index=index)
|
| 302 |
+
|
| 303 |
+
tm.assert_frame_equal(result, expected)
|
| 304 |
+
|
| 305 |
+
def test_multiindex_with_na(self):
|
| 306 |
+
df = DataFrame(
|
| 307 |
+
[
|
| 308 |
+
["A", np.nan, 1.23, 4.56],
|
| 309 |
+
["A", "G", 1.23, 4.56],
|
| 310 |
+
["A", "D", 9.87, 10.54],
|
| 311 |
+
],
|
| 312 |
+
columns=["pivot_0", "pivot_1", "col_1", "col_2"],
|
| 313 |
+
).set_index(["pivot_0", "pivot_1"])
|
| 314 |
+
|
| 315 |
+
df.at[("A", "F"), "col_2"] = 0.0
|
| 316 |
+
|
| 317 |
+
expected = DataFrame(
|
| 318 |
+
[
|
| 319 |
+
["A", np.nan, 1.23, 4.56],
|
| 320 |
+
["A", "G", 1.23, 4.56],
|
| 321 |
+
["A", "D", 9.87, 10.54],
|
| 322 |
+
["A", "F", np.nan, 0.0],
|
| 323 |
+
],
|
| 324 |
+
columns=["pivot_0", "pivot_1", "col_1", "col_2"],
|
| 325 |
+
).set_index(["pivot_0", "pivot_1"])
|
| 326 |
+
|
| 327 |
+
tm.assert_frame_equal(df, expected)
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
class TestSorted:
|
| 331 |
+
"""everything you wanted to test about sorting"""
|
| 332 |
+
|
| 333 |
+
def test_sort_non_lexsorted(self):
|
| 334 |
+
# degenerate case where we sort but don't
|
| 335 |
+
# have a satisfying result :<
|
| 336 |
+
# GH 15797
|
| 337 |
+
idx = MultiIndex(
|
| 338 |
+
[["A", "B", "C"], ["c", "b", "a"]], [[0, 1, 2, 0, 1, 2], [0, 2, 1, 1, 0, 2]]
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64")
|
| 342 |
+
assert df.index.is_monotonic_increasing is False
|
| 343 |
+
|
| 344 |
+
sorted = df.sort_index()
|
| 345 |
+
assert sorted.index.is_monotonic_increasing is True
|
| 346 |
+
|
| 347 |
+
expected = DataFrame(
|
| 348 |
+
{"col": [1, 4, 5, 2]},
|
| 349 |
+
index=MultiIndex.from_tuples(
|
| 350 |
+
[("B", "a"), ("B", "c"), ("C", "a"), ("C", "b")]
|
| 351 |
+
),
|
| 352 |
+
dtype="int64",
|
| 353 |
+
)
|
| 354 |
+
result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :]
|
| 355 |
+
tm.assert_frame_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_nanops.py
ADDED
|
@@ -0,0 +1,1274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import partial
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
import pandas.util._test_decorators as td
|
| 7 |
+
|
| 8 |
+
from pandas.core.dtypes.common import is_integer_dtype
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from pandas import (
|
| 12 |
+
Series,
|
| 13 |
+
isna,
|
| 14 |
+
)
|
| 15 |
+
import pandas._testing as tm
|
| 16 |
+
from pandas.core import nanops
|
| 17 |
+
|
| 18 |
+
use_bn = nanops._USE_BOTTLENECK
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@pytest.fixture
|
| 22 |
+
def disable_bottleneck(monkeypatch):
|
| 23 |
+
with monkeypatch.context() as m:
|
| 24 |
+
m.setattr(nanops, "_USE_BOTTLENECK", False)
|
| 25 |
+
yield
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@pytest.fixture
|
| 29 |
+
def arr_shape():
|
| 30 |
+
return 11, 7
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@pytest.fixture
|
| 34 |
+
def arr_float(arr_shape):
|
| 35 |
+
return np.random.default_rng(2).standard_normal(arr_shape)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@pytest.fixture
|
| 39 |
+
def arr_complex(arr_float):
|
| 40 |
+
return arr_float + arr_float * 1j
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@pytest.fixture
|
| 44 |
+
def arr_int(arr_shape):
|
| 45 |
+
return np.random.default_rng(2).integers(-10, 10, arr_shape)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@pytest.fixture
|
| 49 |
+
def arr_bool(arr_shape):
|
| 50 |
+
return np.random.default_rng(2).integers(0, 2, arr_shape) == 0
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
@pytest.fixture
|
| 54 |
+
def arr_str(arr_float):
|
| 55 |
+
return np.abs(arr_float).astype("S")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@pytest.fixture
|
| 59 |
+
def arr_utf(arr_float):
|
| 60 |
+
return np.abs(arr_float).astype("U")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@pytest.fixture
|
| 64 |
+
def arr_date(arr_shape):
|
| 65 |
+
return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@pytest.fixture
|
| 69 |
+
def arr_tdelta(arr_shape):
|
| 70 |
+
return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@pytest.fixture
|
| 74 |
+
def arr_nan(arr_shape):
|
| 75 |
+
return np.tile(np.nan, arr_shape)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
@pytest.fixture
|
| 79 |
+
def arr_float_nan(arr_float, arr_nan):
|
| 80 |
+
return np.vstack([arr_float, arr_nan])
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
@pytest.fixture
|
| 84 |
+
def arr_nan_float1(arr_nan, arr_float):
|
| 85 |
+
return np.vstack([arr_nan, arr_float])
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@pytest.fixture
|
| 89 |
+
def arr_nan_nan(arr_nan):
|
| 90 |
+
return np.vstack([arr_nan, arr_nan])
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@pytest.fixture
|
| 94 |
+
def arr_inf(arr_float):
|
| 95 |
+
return arr_float * np.inf
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
@pytest.fixture
|
| 99 |
+
def arr_float_inf(arr_float, arr_inf):
|
| 100 |
+
return np.vstack([arr_float, arr_inf])
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@pytest.fixture
|
| 104 |
+
def arr_nan_inf(arr_nan, arr_inf):
|
| 105 |
+
return np.vstack([arr_nan, arr_inf])
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@pytest.fixture
|
| 109 |
+
def arr_float_nan_inf(arr_float, arr_nan, arr_inf):
|
| 110 |
+
return np.vstack([arr_float, arr_nan, arr_inf])
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@pytest.fixture
|
| 114 |
+
def arr_nan_nan_inf(arr_nan, arr_inf):
|
| 115 |
+
return np.vstack([arr_nan, arr_nan, arr_inf])
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
@pytest.fixture
|
| 119 |
+
def arr_obj(
|
| 120 |
+
arr_float, arr_int, arr_bool, arr_complex, arr_str, arr_utf, arr_date, arr_tdelta
|
| 121 |
+
):
|
| 122 |
+
return np.vstack(
|
| 123 |
+
[
|
| 124 |
+
arr_float.astype("O"),
|
| 125 |
+
arr_int.astype("O"),
|
| 126 |
+
arr_bool.astype("O"),
|
| 127 |
+
arr_complex.astype("O"),
|
| 128 |
+
arr_str.astype("O"),
|
| 129 |
+
arr_utf.astype("O"),
|
| 130 |
+
arr_date.astype("O"),
|
| 131 |
+
arr_tdelta.astype("O"),
|
| 132 |
+
]
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
@pytest.fixture
|
| 137 |
+
def arr_nan_nanj(arr_nan):
|
| 138 |
+
with np.errstate(invalid="ignore"):
|
| 139 |
+
return arr_nan + arr_nan * 1j
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
@pytest.fixture
|
| 143 |
+
def arr_complex_nan(arr_complex, arr_nan_nanj):
|
| 144 |
+
with np.errstate(invalid="ignore"):
|
| 145 |
+
return np.vstack([arr_complex, arr_nan_nanj])
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
@pytest.fixture
|
| 149 |
+
def arr_nan_infj(arr_inf):
|
| 150 |
+
with np.errstate(invalid="ignore"):
|
| 151 |
+
return arr_inf * 1j
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@pytest.fixture
|
| 155 |
+
def arr_complex_nan_infj(arr_complex, arr_nan_infj):
|
| 156 |
+
with np.errstate(invalid="ignore"):
|
| 157 |
+
return np.vstack([arr_complex, arr_nan_infj])
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@pytest.fixture
|
| 161 |
+
def arr_float_1d(arr_float):
|
| 162 |
+
return arr_float[:, 0]
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
@pytest.fixture
|
| 166 |
+
def arr_nan_1d(arr_nan):
|
| 167 |
+
return arr_nan[:, 0]
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@pytest.fixture
|
| 171 |
+
def arr_float_nan_1d(arr_float_nan):
|
| 172 |
+
return arr_float_nan[:, 0]
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
@pytest.fixture
|
| 176 |
+
def arr_float1_nan_1d(arr_float1_nan):
|
| 177 |
+
return arr_float1_nan[:, 0]
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@pytest.fixture
|
| 181 |
+
def arr_nan_float1_1d(arr_nan_float1):
|
| 182 |
+
return arr_nan_float1[:, 0]
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
class TestnanopsDataFrame:
|
| 186 |
+
def setup_method(self):
|
| 187 |
+
nanops._USE_BOTTLENECK = False
|
| 188 |
+
|
| 189 |
+
arr_shape = (11, 7)
|
| 190 |
+
|
| 191 |
+
self.arr_float = np.random.default_rng(2).standard_normal(arr_shape)
|
| 192 |
+
self.arr_float1 = np.random.default_rng(2).standard_normal(arr_shape)
|
| 193 |
+
self.arr_complex = self.arr_float + self.arr_float1 * 1j
|
| 194 |
+
self.arr_int = np.random.default_rng(2).integers(-10, 10, arr_shape)
|
| 195 |
+
self.arr_bool = np.random.default_rng(2).integers(0, 2, arr_shape) == 0
|
| 196 |
+
self.arr_str = np.abs(self.arr_float).astype("S")
|
| 197 |
+
self.arr_utf = np.abs(self.arr_float).astype("U")
|
| 198 |
+
self.arr_date = (
|
| 199 |
+
np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
|
| 200 |
+
)
|
| 201 |
+
self.arr_tdelta = (
|
| 202 |
+
np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
self.arr_nan = np.tile(np.nan, arr_shape)
|
| 206 |
+
self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
|
| 207 |
+
self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
|
| 208 |
+
self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
|
| 209 |
+
self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
|
| 210 |
+
|
| 211 |
+
self.arr_inf = self.arr_float * np.inf
|
| 212 |
+
self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
|
| 213 |
+
|
| 214 |
+
self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
|
| 215 |
+
self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf])
|
| 216 |
+
self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf])
|
| 217 |
+
self.arr_obj = np.vstack(
|
| 218 |
+
[
|
| 219 |
+
self.arr_float.astype("O"),
|
| 220 |
+
self.arr_int.astype("O"),
|
| 221 |
+
self.arr_bool.astype("O"),
|
| 222 |
+
self.arr_complex.astype("O"),
|
| 223 |
+
self.arr_str.astype("O"),
|
| 224 |
+
self.arr_utf.astype("O"),
|
| 225 |
+
self.arr_date.astype("O"),
|
| 226 |
+
self.arr_tdelta.astype("O"),
|
| 227 |
+
]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
with np.errstate(invalid="ignore"):
|
| 231 |
+
self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
|
| 232 |
+
self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj])
|
| 233 |
+
|
| 234 |
+
self.arr_nan_infj = self.arr_inf * 1j
|
| 235 |
+
self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj])
|
| 236 |
+
|
| 237 |
+
self.arr_float_2d = self.arr_float
|
| 238 |
+
self.arr_float1_2d = self.arr_float1
|
| 239 |
+
|
| 240 |
+
self.arr_nan_2d = self.arr_nan
|
| 241 |
+
self.arr_float_nan_2d = self.arr_float_nan
|
| 242 |
+
self.arr_float1_nan_2d = self.arr_float1_nan
|
| 243 |
+
self.arr_nan_float1_2d = self.arr_nan_float1
|
| 244 |
+
|
| 245 |
+
self.arr_float_1d = self.arr_float[:, 0]
|
| 246 |
+
self.arr_float1_1d = self.arr_float1[:, 0]
|
| 247 |
+
|
| 248 |
+
self.arr_nan_1d = self.arr_nan[:, 0]
|
| 249 |
+
self.arr_float_nan_1d = self.arr_float_nan[:, 0]
|
| 250 |
+
self.arr_float1_nan_1d = self.arr_float1_nan[:, 0]
|
| 251 |
+
self.arr_nan_float1_1d = self.arr_nan_float1[:, 0]
|
| 252 |
+
|
| 253 |
+
def teardown_method(self):
|
| 254 |
+
nanops._USE_BOTTLENECK = use_bn
|
| 255 |
+
|
| 256 |
+
def check_results(self, targ, res, axis, check_dtype=True):
|
| 257 |
+
res = getattr(res, "asm8", res)
|
| 258 |
+
|
| 259 |
+
if (
|
| 260 |
+
axis != 0
|
| 261 |
+
and hasattr(targ, "shape")
|
| 262 |
+
and targ.ndim
|
| 263 |
+
and targ.shape != res.shape
|
| 264 |
+
):
|
| 265 |
+
res = np.split(res, [targ.shape[0]], axis=0)[0]
|
| 266 |
+
|
| 267 |
+
try:
|
| 268 |
+
tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
|
| 269 |
+
except AssertionError:
|
| 270 |
+
# handle timedelta dtypes
|
| 271 |
+
if hasattr(targ, "dtype") and targ.dtype == "m8[ns]":
|
| 272 |
+
raise
|
| 273 |
+
|
| 274 |
+
# There are sometimes rounding errors with
|
| 275 |
+
# complex and object dtypes.
|
| 276 |
+
# If it isn't one of those, re-raise the error.
|
| 277 |
+
if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]:
|
| 278 |
+
raise
|
| 279 |
+
# convert object dtypes to something that can be split into
|
| 280 |
+
# real and imaginary parts
|
| 281 |
+
if res.dtype.kind == "O":
|
| 282 |
+
if targ.dtype.kind != "O":
|
| 283 |
+
res = res.astype(targ.dtype)
|
| 284 |
+
else:
|
| 285 |
+
cast_dtype = "c16" if hasattr(np, "complex128") else "f8"
|
| 286 |
+
res = res.astype(cast_dtype)
|
| 287 |
+
targ = targ.astype(cast_dtype)
|
| 288 |
+
# there should never be a case where numpy returns an object
|
| 289 |
+
# but nanops doesn't, so make that an exception
|
| 290 |
+
elif targ.dtype.kind == "O":
|
| 291 |
+
raise
|
| 292 |
+
tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype)
|
| 293 |
+
tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype)
|
| 294 |
+
|
| 295 |
+
def check_fun_data(
|
| 296 |
+
self,
|
| 297 |
+
testfunc,
|
| 298 |
+
targfunc,
|
| 299 |
+
testarval,
|
| 300 |
+
targarval,
|
| 301 |
+
skipna,
|
| 302 |
+
check_dtype=True,
|
| 303 |
+
empty_targfunc=None,
|
| 304 |
+
**kwargs,
|
| 305 |
+
):
|
| 306 |
+
for axis in list(range(targarval.ndim)) + [None]:
|
| 307 |
+
targartempval = targarval if skipna else testarval
|
| 308 |
+
if skipna and empty_targfunc and isna(targartempval).all():
|
| 309 |
+
targ = empty_targfunc(targartempval, axis=axis, **kwargs)
|
| 310 |
+
else:
|
| 311 |
+
targ = targfunc(targartempval, axis=axis, **kwargs)
|
| 312 |
+
|
| 313 |
+
if targartempval.dtype == object and (
|
| 314 |
+
targfunc is np.any or targfunc is np.all
|
| 315 |
+
):
|
| 316 |
+
# GH#12863 the numpy functions will retain e.g. floatiness
|
| 317 |
+
if isinstance(targ, np.ndarray):
|
| 318 |
+
targ = targ.astype(bool)
|
| 319 |
+
else:
|
| 320 |
+
targ = bool(targ)
|
| 321 |
+
|
| 322 |
+
res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
|
| 323 |
+
|
| 324 |
+
if (
|
| 325 |
+
isinstance(targ, np.complex128)
|
| 326 |
+
and isinstance(res, float)
|
| 327 |
+
and np.isnan(targ)
|
| 328 |
+
and np.isnan(res)
|
| 329 |
+
):
|
| 330 |
+
# GH#18463
|
| 331 |
+
targ = res
|
| 332 |
+
|
| 333 |
+
self.check_results(targ, res, axis, check_dtype=check_dtype)
|
| 334 |
+
if skipna:
|
| 335 |
+
res = testfunc(testarval, axis=axis, **kwargs)
|
| 336 |
+
self.check_results(targ, res, axis, check_dtype=check_dtype)
|
| 337 |
+
if axis is None:
|
| 338 |
+
res = testfunc(testarval, skipna=skipna, **kwargs)
|
| 339 |
+
self.check_results(targ, res, axis, check_dtype=check_dtype)
|
| 340 |
+
if skipna and axis is None:
|
| 341 |
+
res = testfunc(testarval, **kwargs)
|
| 342 |
+
self.check_results(targ, res, axis, check_dtype=check_dtype)
|
| 343 |
+
|
| 344 |
+
if testarval.ndim <= 1:
|
| 345 |
+
return
|
| 346 |
+
|
| 347 |
+
# Recurse on lower-dimension
|
| 348 |
+
testarval2 = np.take(testarval, 0, axis=-1)
|
| 349 |
+
targarval2 = np.take(targarval, 0, axis=-1)
|
| 350 |
+
self.check_fun_data(
|
| 351 |
+
testfunc,
|
| 352 |
+
targfunc,
|
| 353 |
+
testarval2,
|
| 354 |
+
targarval2,
|
| 355 |
+
skipna=skipna,
|
| 356 |
+
check_dtype=check_dtype,
|
| 357 |
+
empty_targfunc=empty_targfunc,
|
| 358 |
+
**kwargs,
|
| 359 |
+
)
|
| 360 |
+
|
| 361 |
+
def check_fun(
|
| 362 |
+
self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs
|
| 363 |
+
):
|
| 364 |
+
targar = testar
|
| 365 |
+
if testar.endswith("_nan") and hasattr(self, testar[:-4]):
|
| 366 |
+
targar = testar[:-4]
|
| 367 |
+
|
| 368 |
+
testarval = getattr(self, testar)
|
| 369 |
+
targarval = getattr(self, targar)
|
| 370 |
+
self.check_fun_data(
|
| 371 |
+
testfunc,
|
| 372 |
+
targfunc,
|
| 373 |
+
testarval,
|
| 374 |
+
targarval,
|
| 375 |
+
skipna=skipna,
|
| 376 |
+
empty_targfunc=empty_targfunc,
|
| 377 |
+
**kwargs,
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
def check_funs(
|
| 381 |
+
self,
|
| 382 |
+
testfunc,
|
| 383 |
+
targfunc,
|
| 384 |
+
skipna,
|
| 385 |
+
allow_complex=True,
|
| 386 |
+
allow_all_nan=True,
|
| 387 |
+
allow_date=True,
|
| 388 |
+
allow_tdelta=True,
|
| 389 |
+
allow_obj=True,
|
| 390 |
+
**kwargs,
|
| 391 |
+
):
|
| 392 |
+
self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs)
|
| 393 |
+
self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs)
|
| 394 |
+
self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs)
|
| 395 |
+
self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs)
|
| 396 |
+
objs = [
|
| 397 |
+
self.arr_float.astype("O"),
|
| 398 |
+
self.arr_int.astype("O"),
|
| 399 |
+
self.arr_bool.astype("O"),
|
| 400 |
+
]
|
| 401 |
+
|
| 402 |
+
if allow_all_nan:
|
| 403 |
+
self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs)
|
| 404 |
+
|
| 405 |
+
if allow_complex:
|
| 406 |
+
self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs)
|
| 407 |
+
self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs)
|
| 408 |
+
if allow_all_nan:
|
| 409 |
+
self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs)
|
| 410 |
+
objs += [self.arr_complex.astype("O")]
|
| 411 |
+
|
| 412 |
+
if allow_date:
|
| 413 |
+
targfunc(self.arr_date)
|
| 414 |
+
self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs)
|
| 415 |
+
objs += [self.arr_date.astype("O")]
|
| 416 |
+
|
| 417 |
+
if allow_tdelta:
|
| 418 |
+
try:
|
| 419 |
+
targfunc(self.arr_tdelta)
|
| 420 |
+
except TypeError:
|
| 421 |
+
pass
|
| 422 |
+
else:
|
| 423 |
+
self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs)
|
| 424 |
+
objs += [self.arr_tdelta.astype("O")]
|
| 425 |
+
|
| 426 |
+
if allow_obj:
|
| 427 |
+
self.arr_obj = np.vstack(objs)
|
| 428 |
+
# some nanops handle object dtypes better than their numpy
|
| 429 |
+
# counterparts, so the numpy functions need to be given something
|
| 430 |
+
# else
|
| 431 |
+
if allow_obj == "convert":
|
| 432 |
+
targfunc = partial(
|
| 433 |
+
self._badobj_wrap, func=targfunc, allow_complex=allow_complex
|
| 434 |
+
)
|
| 435 |
+
self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs)
|
| 436 |
+
|
| 437 |
+
def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
|
| 438 |
+
if value.dtype.kind == "O":
|
| 439 |
+
if allow_complex:
|
| 440 |
+
value = value.astype("c16")
|
| 441 |
+
else:
|
| 442 |
+
value = value.astype("f8")
|
| 443 |
+
return func(value, **kwargs)
|
| 444 |
+
|
| 445 |
+
@pytest.mark.parametrize(
|
| 446 |
+
"nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
|
| 447 |
+
)
|
| 448 |
+
def test_nan_funcs(self, nan_op, np_op, skipna):
|
| 449 |
+
self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False)
|
| 450 |
+
|
| 451 |
+
def test_nansum(self, skipna):
|
| 452 |
+
self.check_funs(
|
| 453 |
+
nanops.nansum,
|
| 454 |
+
np.sum,
|
| 455 |
+
skipna,
|
| 456 |
+
allow_date=False,
|
| 457 |
+
check_dtype=False,
|
| 458 |
+
empty_targfunc=np.nansum,
|
| 459 |
+
)
|
| 460 |
+
|
| 461 |
+
def test_nanmean(self, skipna):
|
| 462 |
+
self.check_funs(
|
| 463 |
+
nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
| 467 |
+
def test_nanmedian(self, skipna):
|
| 468 |
+
self.check_funs(
|
| 469 |
+
nanops.nanmedian,
|
| 470 |
+
np.median,
|
| 471 |
+
skipna,
|
| 472 |
+
allow_complex=False,
|
| 473 |
+
allow_date=False,
|
| 474 |
+
allow_obj="convert",
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
@pytest.mark.parametrize("ddof", range(3))
|
| 478 |
+
def test_nanvar(self, ddof, skipna):
|
| 479 |
+
self.check_funs(
|
| 480 |
+
nanops.nanvar,
|
| 481 |
+
np.var,
|
| 482 |
+
skipna,
|
| 483 |
+
allow_complex=False,
|
| 484 |
+
allow_date=False,
|
| 485 |
+
allow_obj="convert",
|
| 486 |
+
ddof=ddof,
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
@pytest.mark.parametrize("ddof", range(3))
|
| 490 |
+
def test_nanstd(self, ddof, skipna):
|
| 491 |
+
self.check_funs(
|
| 492 |
+
nanops.nanstd,
|
| 493 |
+
np.std,
|
| 494 |
+
skipna,
|
| 495 |
+
allow_complex=False,
|
| 496 |
+
allow_date=False,
|
| 497 |
+
allow_obj="convert",
|
| 498 |
+
ddof=ddof,
|
| 499 |
+
)
|
| 500 |
+
|
| 501 |
+
@pytest.mark.parametrize("ddof", range(3))
|
| 502 |
+
def test_nansem(self, ddof, skipna):
|
| 503 |
+
sp_stats = pytest.importorskip("scipy.stats")
|
| 504 |
+
|
| 505 |
+
with np.errstate(invalid="ignore"):
|
| 506 |
+
self.check_funs(
|
| 507 |
+
nanops.nansem,
|
| 508 |
+
sp_stats.sem,
|
| 509 |
+
skipna,
|
| 510 |
+
allow_complex=False,
|
| 511 |
+
allow_date=False,
|
| 512 |
+
allow_tdelta=False,
|
| 513 |
+
allow_obj="convert",
|
| 514 |
+
ddof=ddof,
|
| 515 |
+
)
|
| 516 |
+
|
| 517 |
+
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
| 518 |
+
@pytest.mark.parametrize(
|
| 519 |
+
"nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)]
|
| 520 |
+
)
|
| 521 |
+
def test_nanops_with_warnings(self, nan_op, np_op, skipna):
|
| 522 |
+
self.check_funs(nan_op, np_op, skipna, allow_obj=False)
|
| 523 |
+
|
| 524 |
+
def _argminmax_wrap(self, value, axis=None, func=None):
|
| 525 |
+
res = func(value, axis)
|
| 526 |
+
nans = np.min(value, axis)
|
| 527 |
+
nullnan = isna(nans)
|
| 528 |
+
if res.ndim:
|
| 529 |
+
res[nullnan] = -1
|
| 530 |
+
elif (
|
| 531 |
+
hasattr(nullnan, "all")
|
| 532 |
+
and nullnan.all()
|
| 533 |
+
or not hasattr(nullnan, "all")
|
| 534 |
+
and nullnan
|
| 535 |
+
):
|
| 536 |
+
res = -1
|
| 537 |
+
return res
|
| 538 |
+
|
| 539 |
+
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
| 540 |
+
def test_nanargmax(self, skipna):
|
| 541 |
+
func = partial(self._argminmax_wrap, func=np.argmax)
|
| 542 |
+
self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False)
|
| 543 |
+
|
| 544 |
+
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
| 545 |
+
def test_nanargmin(self, skipna):
|
| 546 |
+
func = partial(self._argminmax_wrap, func=np.argmin)
|
| 547 |
+
self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False)
|
| 548 |
+
|
| 549 |
+
def _skew_kurt_wrap(self, values, axis=None, func=None):
|
| 550 |
+
if not isinstance(values.dtype.type, np.floating):
|
| 551 |
+
values = values.astype("f8")
|
| 552 |
+
result = func(values, axis=axis, bias=False)
|
| 553 |
+
# fix for handling cases where all elements in an axis are the same
|
| 554 |
+
if isinstance(result, np.ndarray):
|
| 555 |
+
result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
|
| 556 |
+
return result
|
| 557 |
+
elif np.max(values) == np.min(values):
|
| 558 |
+
return 0.0
|
| 559 |
+
return result
|
| 560 |
+
|
| 561 |
+
def test_nanskew(self, skipna):
|
| 562 |
+
sp_stats = pytest.importorskip("scipy.stats")
|
| 563 |
+
|
| 564 |
+
func = partial(self._skew_kurt_wrap, func=sp_stats.skew)
|
| 565 |
+
with np.errstate(invalid="ignore"):
|
| 566 |
+
self.check_funs(
|
| 567 |
+
nanops.nanskew,
|
| 568 |
+
func,
|
| 569 |
+
skipna,
|
| 570 |
+
allow_complex=False,
|
| 571 |
+
allow_date=False,
|
| 572 |
+
allow_tdelta=False,
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
def test_nankurt(self, skipna):
|
| 576 |
+
sp_stats = pytest.importorskip("scipy.stats")
|
| 577 |
+
|
| 578 |
+
func1 = partial(sp_stats.kurtosis, fisher=True)
|
| 579 |
+
func = partial(self._skew_kurt_wrap, func=func1)
|
| 580 |
+
with np.errstate(invalid="ignore"):
|
| 581 |
+
self.check_funs(
|
| 582 |
+
nanops.nankurt,
|
| 583 |
+
func,
|
| 584 |
+
skipna,
|
| 585 |
+
allow_complex=False,
|
| 586 |
+
allow_date=False,
|
| 587 |
+
allow_tdelta=False,
|
| 588 |
+
)
|
| 589 |
+
|
| 590 |
+
def test_nanprod(self, skipna):
|
| 591 |
+
self.check_funs(
|
| 592 |
+
nanops.nanprod,
|
| 593 |
+
np.prod,
|
| 594 |
+
skipna,
|
| 595 |
+
allow_date=False,
|
| 596 |
+
allow_tdelta=False,
|
| 597 |
+
empty_targfunc=np.nanprod,
|
| 598 |
+
)
|
| 599 |
+
|
| 600 |
+
def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
|
| 601 |
+
res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
|
| 602 |
+
res01 = checkfun(
|
| 603 |
+
self.arr_float_2d,
|
| 604 |
+
self.arr_float1_2d,
|
| 605 |
+
min_periods=len(self.arr_float_2d) - 1,
|
| 606 |
+
**kwargs,
|
| 607 |
+
)
|
| 608 |
+
tm.assert_almost_equal(targ0, res00)
|
| 609 |
+
tm.assert_almost_equal(targ0, res01)
|
| 610 |
+
|
| 611 |
+
res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs)
|
| 612 |
+
res11 = checkfun(
|
| 613 |
+
self.arr_float_nan_2d,
|
| 614 |
+
self.arr_float1_nan_2d,
|
| 615 |
+
min_periods=len(self.arr_float_2d) - 1,
|
| 616 |
+
**kwargs,
|
| 617 |
+
)
|
| 618 |
+
tm.assert_almost_equal(targ1, res10)
|
| 619 |
+
tm.assert_almost_equal(targ1, res11)
|
| 620 |
+
|
| 621 |
+
targ2 = np.nan
|
| 622 |
+
res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
|
| 623 |
+
res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
|
| 624 |
+
res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
|
| 625 |
+
res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs)
|
| 626 |
+
res24 = checkfun(
|
| 627 |
+
self.arr_float_nan_2d,
|
| 628 |
+
self.arr_nan_float1_2d,
|
| 629 |
+
min_periods=len(self.arr_float_2d) - 1,
|
| 630 |
+
**kwargs,
|
| 631 |
+
)
|
| 632 |
+
res25 = checkfun(
|
| 633 |
+
self.arr_float_2d,
|
| 634 |
+
self.arr_float1_2d,
|
| 635 |
+
min_periods=len(self.arr_float_2d) + 1,
|
| 636 |
+
**kwargs,
|
| 637 |
+
)
|
| 638 |
+
tm.assert_almost_equal(targ2, res20)
|
| 639 |
+
tm.assert_almost_equal(targ2, res21)
|
| 640 |
+
tm.assert_almost_equal(targ2, res22)
|
| 641 |
+
tm.assert_almost_equal(targ2, res23)
|
| 642 |
+
tm.assert_almost_equal(targ2, res24)
|
| 643 |
+
tm.assert_almost_equal(targ2, res25)
|
| 644 |
+
|
| 645 |
+
def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
|
| 646 |
+
res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
|
| 647 |
+
res01 = checkfun(
|
| 648 |
+
self.arr_float_1d,
|
| 649 |
+
self.arr_float1_1d,
|
| 650 |
+
min_periods=len(self.arr_float_1d) - 1,
|
| 651 |
+
**kwargs,
|
| 652 |
+
)
|
| 653 |
+
tm.assert_almost_equal(targ0, res00)
|
| 654 |
+
tm.assert_almost_equal(targ0, res01)
|
| 655 |
+
|
| 656 |
+
res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs)
|
| 657 |
+
res11 = checkfun(
|
| 658 |
+
self.arr_float_nan_1d,
|
| 659 |
+
self.arr_float1_nan_1d,
|
| 660 |
+
min_periods=len(self.arr_float_1d) - 1,
|
| 661 |
+
**kwargs,
|
| 662 |
+
)
|
| 663 |
+
tm.assert_almost_equal(targ1, res10)
|
| 664 |
+
tm.assert_almost_equal(targ1, res11)
|
| 665 |
+
|
| 666 |
+
targ2 = np.nan
|
| 667 |
+
res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
|
| 668 |
+
res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
|
| 669 |
+
res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
|
| 670 |
+
res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs)
|
| 671 |
+
res24 = checkfun(
|
| 672 |
+
self.arr_float_nan_1d,
|
| 673 |
+
self.arr_nan_float1_1d,
|
| 674 |
+
min_periods=len(self.arr_float_1d) - 1,
|
| 675 |
+
**kwargs,
|
| 676 |
+
)
|
| 677 |
+
res25 = checkfun(
|
| 678 |
+
self.arr_float_1d,
|
| 679 |
+
self.arr_float1_1d,
|
| 680 |
+
min_periods=len(self.arr_float_1d) + 1,
|
| 681 |
+
**kwargs,
|
| 682 |
+
)
|
| 683 |
+
tm.assert_almost_equal(targ2, res20)
|
| 684 |
+
tm.assert_almost_equal(targ2, res21)
|
| 685 |
+
tm.assert_almost_equal(targ2, res22)
|
| 686 |
+
tm.assert_almost_equal(targ2, res23)
|
| 687 |
+
tm.assert_almost_equal(targ2, res24)
|
| 688 |
+
tm.assert_almost_equal(targ2, res25)
|
| 689 |
+
|
| 690 |
+
def test_nancorr(self):
|
| 691 |
+
targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
| 692 |
+
targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
|
| 693 |
+
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
|
| 694 |
+
targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
|
| 695 |
+
targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
|
| 696 |
+
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
|
| 697 |
+
|
| 698 |
+
def test_nancorr_pearson(self):
|
| 699 |
+
targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
| 700 |
+
targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
|
| 701 |
+
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
|
| 702 |
+
targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
|
| 703 |
+
targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
|
| 704 |
+
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
|
| 705 |
+
|
| 706 |
+
def test_nancorr_kendall(self):
|
| 707 |
+
sp_stats = pytest.importorskip("scipy.stats")
|
| 708 |
+
|
| 709 |
+
targ0 = sp_stats.kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
|
| 710 |
+
targ1 = sp_stats.kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
|
| 711 |
+
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
|
| 712 |
+
targ0 = sp_stats.kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
|
| 713 |
+
targ1 = sp_stats.kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
|
| 714 |
+
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
|
| 715 |
+
|
| 716 |
+
def test_nancorr_spearman(self):
|
| 717 |
+
sp_stats = pytest.importorskip("scipy.stats")
|
| 718 |
+
|
| 719 |
+
targ0 = sp_stats.spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
|
| 720 |
+
targ1 = sp_stats.spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
|
| 721 |
+
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman")
|
| 722 |
+
targ0 = sp_stats.spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
|
| 723 |
+
targ1 = sp_stats.spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
|
| 724 |
+
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
|
| 725 |
+
|
| 726 |
+
def test_invalid_method(self):
|
| 727 |
+
pytest.importorskip("scipy")
|
| 728 |
+
targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
| 729 |
+
targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
|
| 730 |
+
msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'"
|
| 731 |
+
with pytest.raises(ValueError, match=msg):
|
| 732 |
+
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo")
|
| 733 |
+
|
| 734 |
+
def test_nancov(self):
|
| 735 |
+
targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
| 736 |
+
targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
|
| 737 |
+
self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
|
| 738 |
+
targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
|
| 739 |
+
targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
|
| 740 |
+
self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
|
| 741 |
+
|
| 742 |
+
|
| 743 |
+
@pytest.mark.parametrize(
|
| 744 |
+
"arr, correct",
|
| 745 |
+
[
|
| 746 |
+
("arr_complex", False),
|
| 747 |
+
("arr_int", False),
|
| 748 |
+
("arr_bool", False),
|
| 749 |
+
("arr_str", False),
|
| 750 |
+
("arr_utf", False),
|
| 751 |
+
("arr_complex", False),
|
| 752 |
+
("arr_complex_nan", False),
|
| 753 |
+
("arr_nan_nanj", False),
|
| 754 |
+
("arr_nan_infj", True),
|
| 755 |
+
("arr_complex_nan_infj", True),
|
| 756 |
+
],
|
| 757 |
+
)
|
| 758 |
+
def test_has_infs_non_float(request, arr, correct, disable_bottleneck):
|
| 759 |
+
val = request.getfixturevalue(arr)
|
| 760 |
+
while getattr(val, "ndim", True):
|
| 761 |
+
res0 = nanops._has_infs(val)
|
| 762 |
+
if correct:
|
| 763 |
+
assert res0
|
| 764 |
+
else:
|
| 765 |
+
assert not res0
|
| 766 |
+
|
| 767 |
+
if not hasattr(val, "ndim"):
|
| 768 |
+
break
|
| 769 |
+
|
| 770 |
+
# Reduce dimension for next step in the loop
|
| 771 |
+
val = np.take(val, 0, axis=-1)
|
| 772 |
+
|
| 773 |
+
|
| 774 |
+
@pytest.mark.parametrize(
|
| 775 |
+
"arr, correct",
|
| 776 |
+
[
|
| 777 |
+
("arr_float", False),
|
| 778 |
+
("arr_nan", False),
|
| 779 |
+
("arr_float_nan", False),
|
| 780 |
+
("arr_nan_nan", False),
|
| 781 |
+
("arr_float_inf", True),
|
| 782 |
+
("arr_inf", True),
|
| 783 |
+
("arr_nan_inf", True),
|
| 784 |
+
("arr_float_nan_inf", True),
|
| 785 |
+
("arr_nan_nan_inf", True),
|
| 786 |
+
],
|
| 787 |
+
)
|
| 788 |
+
@pytest.mark.parametrize("astype", [None, "f4", "f2"])
|
| 789 |
+
def test_has_infs_floats(request, arr, correct, astype, disable_bottleneck):
|
| 790 |
+
val = request.getfixturevalue(arr)
|
| 791 |
+
if astype is not None:
|
| 792 |
+
val = val.astype(astype)
|
| 793 |
+
while getattr(val, "ndim", True):
|
| 794 |
+
res0 = nanops._has_infs(val)
|
| 795 |
+
if correct:
|
| 796 |
+
assert res0
|
| 797 |
+
else:
|
| 798 |
+
assert not res0
|
| 799 |
+
|
| 800 |
+
if not hasattr(val, "ndim"):
|
| 801 |
+
break
|
| 802 |
+
|
| 803 |
+
# Reduce dimension for next step in the loop
|
| 804 |
+
val = np.take(val, 0, axis=-1)
|
| 805 |
+
|
| 806 |
+
|
| 807 |
+
@pytest.mark.parametrize(
|
| 808 |
+
"fixture", ["arr_float", "arr_complex", "arr_int", "arr_bool", "arr_str", "arr_utf"]
|
| 809 |
+
)
|
| 810 |
+
def test_bn_ok_dtype(fixture, request, disable_bottleneck):
|
| 811 |
+
obj = request.getfixturevalue(fixture)
|
| 812 |
+
assert nanops._bn_ok_dtype(obj.dtype, "test")
|
| 813 |
+
|
| 814 |
+
|
| 815 |
+
@pytest.mark.parametrize(
|
| 816 |
+
"fixture",
|
| 817 |
+
[
|
| 818 |
+
"arr_date",
|
| 819 |
+
"arr_tdelta",
|
| 820 |
+
"arr_obj",
|
| 821 |
+
],
|
| 822 |
+
)
|
| 823 |
+
def test_bn_not_ok_dtype(fixture, request, disable_bottleneck):
|
| 824 |
+
obj = request.getfixturevalue(fixture)
|
| 825 |
+
assert not nanops._bn_ok_dtype(obj.dtype, "test")
|
| 826 |
+
|
| 827 |
+
|
| 828 |
+
class TestEnsureNumeric:
|
| 829 |
+
def test_numeric_values(self):
|
| 830 |
+
# Test integer
|
| 831 |
+
assert nanops._ensure_numeric(1) == 1
|
| 832 |
+
|
| 833 |
+
# Test float
|
| 834 |
+
assert nanops._ensure_numeric(1.1) == 1.1
|
| 835 |
+
|
| 836 |
+
# Test complex
|
| 837 |
+
assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
|
| 838 |
+
|
| 839 |
+
def test_ndarray(self):
|
| 840 |
+
# Test numeric ndarray
|
| 841 |
+
values = np.array([1, 2, 3])
|
| 842 |
+
assert np.allclose(nanops._ensure_numeric(values), values)
|
| 843 |
+
|
| 844 |
+
# Test object ndarray
|
| 845 |
+
o_values = values.astype(object)
|
| 846 |
+
assert np.allclose(nanops._ensure_numeric(o_values), values)
|
| 847 |
+
|
| 848 |
+
# Test convertible string ndarray
|
| 849 |
+
s_values = np.array(["1", "2", "3"], dtype=object)
|
| 850 |
+
msg = r"Could not convert \['1' '2' '3'\] to numeric"
|
| 851 |
+
with pytest.raises(TypeError, match=msg):
|
| 852 |
+
nanops._ensure_numeric(s_values)
|
| 853 |
+
|
| 854 |
+
# Test non-convertible string ndarray
|
| 855 |
+
s_values = np.array(["foo", "bar", "baz"], dtype=object)
|
| 856 |
+
msg = r"Could not convert .* to numeric"
|
| 857 |
+
with pytest.raises(TypeError, match=msg):
|
| 858 |
+
nanops._ensure_numeric(s_values)
|
| 859 |
+
|
| 860 |
+
def test_convertable_values(self):
|
| 861 |
+
with pytest.raises(TypeError, match="Could not convert string '1' to numeric"):
|
| 862 |
+
nanops._ensure_numeric("1")
|
| 863 |
+
with pytest.raises(
|
| 864 |
+
TypeError, match="Could not convert string '1.1' to numeric"
|
| 865 |
+
):
|
| 866 |
+
nanops._ensure_numeric("1.1")
|
| 867 |
+
with pytest.raises(
|
| 868 |
+
TypeError, match=r"Could not convert string '1\+1j' to numeric"
|
| 869 |
+
):
|
| 870 |
+
nanops._ensure_numeric("1+1j")
|
| 871 |
+
|
| 872 |
+
def test_non_convertable_values(self):
|
| 873 |
+
msg = "Could not convert string 'foo' to numeric"
|
| 874 |
+
with pytest.raises(TypeError, match=msg):
|
| 875 |
+
nanops._ensure_numeric("foo")
|
| 876 |
+
|
| 877 |
+
# with the wrong type, python raises TypeError for us
|
| 878 |
+
msg = "argument must be a string or a number"
|
| 879 |
+
with pytest.raises(TypeError, match=msg):
|
| 880 |
+
nanops._ensure_numeric({})
|
| 881 |
+
with pytest.raises(TypeError, match=msg):
|
| 882 |
+
nanops._ensure_numeric([])
|
| 883 |
+
|
| 884 |
+
|
| 885 |
+
class TestNanvarFixedValues:
|
| 886 |
+
# xref GH10242
|
| 887 |
+
# Samples from a normal distribution.
|
| 888 |
+
@pytest.fixture
|
| 889 |
+
def variance(self):
|
| 890 |
+
return 3.0
|
| 891 |
+
|
| 892 |
+
@pytest.fixture
|
| 893 |
+
def samples(self, variance):
|
| 894 |
+
return self.prng.normal(scale=variance**0.5, size=100000)
|
| 895 |
+
|
| 896 |
+
def test_nanvar_all_finite(self, samples, variance):
|
| 897 |
+
actual_variance = nanops.nanvar(samples)
|
| 898 |
+
tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
|
| 899 |
+
|
| 900 |
+
def test_nanvar_nans(self, samples, variance):
|
| 901 |
+
samples_test = np.nan * np.ones(2 * samples.shape[0])
|
| 902 |
+
samples_test[::2] = samples
|
| 903 |
+
|
| 904 |
+
actual_variance = nanops.nanvar(samples_test, skipna=True)
|
| 905 |
+
tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
|
| 906 |
+
|
| 907 |
+
actual_variance = nanops.nanvar(samples_test, skipna=False)
|
| 908 |
+
tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)
|
| 909 |
+
|
| 910 |
+
def test_nanstd_nans(self, samples, variance):
|
| 911 |
+
samples_test = np.nan * np.ones(2 * samples.shape[0])
|
| 912 |
+
samples_test[::2] = samples
|
| 913 |
+
|
| 914 |
+
actual_std = nanops.nanstd(samples_test, skipna=True)
|
| 915 |
+
tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2)
|
| 916 |
+
|
| 917 |
+
actual_std = nanops.nanvar(samples_test, skipna=False)
|
| 918 |
+
tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)
|
| 919 |
+
|
| 920 |
+
def test_nanvar_axis(self, samples, variance):
|
| 921 |
+
# Generate some sample data.
|
| 922 |
+
samples_unif = self.prng.uniform(size=samples.shape[0])
|
| 923 |
+
samples = np.vstack([samples, samples_unif])
|
| 924 |
+
|
| 925 |
+
actual_variance = nanops.nanvar(samples, axis=1)
|
| 926 |
+
tm.assert_almost_equal(
|
| 927 |
+
actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2
|
| 928 |
+
)
|
| 929 |
+
|
| 930 |
+
def test_nanvar_ddof(self):
|
| 931 |
+
n = 5
|
| 932 |
+
samples = self.prng.uniform(size=(10000, n + 1))
|
| 933 |
+
samples[:, -1] = np.nan # Force use of our own algorithm.
|
| 934 |
+
|
| 935 |
+
variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
|
| 936 |
+
variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
|
| 937 |
+
variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
|
| 938 |
+
|
| 939 |
+
# The unbiased estimate.
|
| 940 |
+
var = 1.0 / 12
|
| 941 |
+
tm.assert_almost_equal(variance_1, var, rtol=1e-2)
|
| 942 |
+
|
| 943 |
+
# The underestimated variance.
|
| 944 |
+
tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2)
|
| 945 |
+
|
| 946 |
+
# The overestimated variance.
|
| 947 |
+
tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2)
|
| 948 |
+
|
| 949 |
+
@pytest.mark.parametrize("axis", range(2))
|
| 950 |
+
@pytest.mark.parametrize("ddof", range(3))
|
| 951 |
+
def test_ground_truth(self, axis, ddof):
|
| 952 |
+
# Test against values that were precomputed with Numpy.
|
| 953 |
+
samples = np.empty((4, 4))
|
| 954 |
+
samples[:3, :3] = np.array(
|
| 955 |
+
[
|
| 956 |
+
[0.97303362, 0.21869576, 0.55560287],
|
| 957 |
+
[0.72980153, 0.03109364, 0.99155171],
|
| 958 |
+
[0.09317602, 0.60078248, 0.15871292],
|
| 959 |
+
]
|
| 960 |
+
)
|
| 961 |
+
samples[3] = samples[:, 3] = np.nan
|
| 962 |
+
|
| 963 |
+
# Actual variances along axis=0, 1 for ddof=0, 1, 2
|
| 964 |
+
variance = np.array(
|
| 965 |
+
[
|
| 966 |
+
[
|
| 967 |
+
[0.13762259, 0.05619224, 0.11568816],
|
| 968 |
+
[0.20643388, 0.08428837, 0.17353224],
|
| 969 |
+
[0.41286776, 0.16857673, 0.34706449],
|
| 970 |
+
],
|
| 971 |
+
[
|
| 972 |
+
[0.09519783, 0.16435395, 0.05082054],
|
| 973 |
+
[0.14279674, 0.24653093, 0.07623082],
|
| 974 |
+
[0.28559348, 0.49306186, 0.15246163],
|
| 975 |
+
],
|
| 976 |
+
]
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
# Test nanvar.
|
| 980 |
+
var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
|
| 981 |
+
tm.assert_almost_equal(var[:3], variance[axis, ddof])
|
| 982 |
+
assert np.isnan(var[3])
|
| 983 |
+
|
| 984 |
+
# Test nanstd.
|
| 985 |
+
std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
|
| 986 |
+
tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
|
| 987 |
+
assert np.isnan(std[3])
|
| 988 |
+
|
| 989 |
+
@pytest.mark.parametrize("ddof", range(3))
|
| 990 |
+
def test_nanstd_roundoff(self, ddof):
|
| 991 |
+
# Regression test for GH 10242 (test data taken from GH 10489). Ensure
|
| 992 |
+
# that variance is stable.
|
| 993 |
+
data = Series(766897346 * np.ones(10))
|
| 994 |
+
result = data.std(ddof=ddof)
|
| 995 |
+
assert result == 0.0
|
| 996 |
+
|
| 997 |
+
@property
|
| 998 |
+
def prng(self):
|
| 999 |
+
return np.random.default_rng(2)
|
| 1000 |
+
|
| 1001 |
+
|
| 1002 |
+
class TestNanskewFixedValues:
|
| 1003 |
+
# xref GH 11974
|
| 1004 |
+
# Test data + skewness value (computed with scipy.stats.skew)
|
| 1005 |
+
@pytest.fixture
|
| 1006 |
+
def samples(self):
|
| 1007 |
+
return np.sin(np.linspace(0, 1, 200))
|
| 1008 |
+
|
| 1009 |
+
@pytest.fixture
|
| 1010 |
+
def actual_skew(self):
|
| 1011 |
+
return -0.1875895205961754
|
| 1012 |
+
|
| 1013 |
+
@pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
|
| 1014 |
+
def test_constant_series(self, val):
|
| 1015 |
+
# xref GH 11974
|
| 1016 |
+
data = val * np.ones(300)
|
| 1017 |
+
skew = nanops.nanskew(data)
|
| 1018 |
+
assert skew == 0.0
|
| 1019 |
+
|
| 1020 |
+
def test_all_finite(self):
|
| 1021 |
+
alpha, beta = 0.3, 0.1
|
| 1022 |
+
left_tailed = self.prng.beta(alpha, beta, size=100)
|
| 1023 |
+
assert nanops.nanskew(left_tailed) < 0
|
| 1024 |
+
|
| 1025 |
+
alpha, beta = 0.1, 0.3
|
| 1026 |
+
right_tailed = self.prng.beta(alpha, beta, size=100)
|
| 1027 |
+
assert nanops.nanskew(right_tailed) > 0
|
| 1028 |
+
|
| 1029 |
+
def test_ground_truth(self, samples, actual_skew):
|
| 1030 |
+
skew = nanops.nanskew(samples)
|
| 1031 |
+
tm.assert_almost_equal(skew, actual_skew)
|
| 1032 |
+
|
| 1033 |
+
def test_axis(self, samples, actual_skew):
|
| 1034 |
+
samples = np.vstack([samples, np.nan * np.ones(len(samples))])
|
| 1035 |
+
skew = nanops.nanskew(samples, axis=1)
|
| 1036 |
+
tm.assert_almost_equal(skew, np.array([actual_skew, np.nan]))
|
| 1037 |
+
|
| 1038 |
+
def test_nans(self, samples):
|
| 1039 |
+
samples = np.hstack([samples, np.nan])
|
| 1040 |
+
skew = nanops.nanskew(samples, skipna=False)
|
| 1041 |
+
assert np.isnan(skew)
|
| 1042 |
+
|
| 1043 |
+
def test_nans_skipna(self, samples, actual_skew):
|
| 1044 |
+
samples = np.hstack([samples, np.nan])
|
| 1045 |
+
skew = nanops.nanskew(samples, skipna=True)
|
| 1046 |
+
tm.assert_almost_equal(skew, actual_skew)
|
| 1047 |
+
|
| 1048 |
+
@property
|
| 1049 |
+
def prng(self):
|
| 1050 |
+
return np.random.default_rng(2)
|
| 1051 |
+
|
| 1052 |
+
|
| 1053 |
+
class TestNankurtFixedValues:
|
| 1054 |
+
# xref GH 11974
|
| 1055 |
+
# Test data + kurtosis value (computed with scipy.stats.kurtosis)
|
| 1056 |
+
@pytest.fixture
|
| 1057 |
+
def samples(self):
|
| 1058 |
+
return np.sin(np.linspace(0, 1, 200))
|
| 1059 |
+
|
| 1060 |
+
@pytest.fixture
|
| 1061 |
+
def actual_kurt(self):
|
| 1062 |
+
return -1.2058303433799713
|
| 1063 |
+
|
| 1064 |
+
@pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
|
| 1065 |
+
def test_constant_series(self, val):
|
| 1066 |
+
# xref GH 11974
|
| 1067 |
+
data = val * np.ones(300)
|
| 1068 |
+
kurt = nanops.nankurt(data)
|
| 1069 |
+
assert kurt == 0.0
|
| 1070 |
+
|
| 1071 |
+
def test_all_finite(self):
|
| 1072 |
+
alpha, beta = 0.3, 0.1
|
| 1073 |
+
left_tailed = self.prng.beta(alpha, beta, size=100)
|
| 1074 |
+
assert nanops.nankurt(left_tailed) < 2
|
| 1075 |
+
|
| 1076 |
+
alpha, beta = 0.1, 0.3
|
| 1077 |
+
right_tailed = self.prng.beta(alpha, beta, size=100)
|
| 1078 |
+
assert nanops.nankurt(right_tailed) < 0
|
| 1079 |
+
|
| 1080 |
+
def test_ground_truth(self, samples, actual_kurt):
|
| 1081 |
+
kurt = nanops.nankurt(samples)
|
| 1082 |
+
tm.assert_almost_equal(kurt, actual_kurt)
|
| 1083 |
+
|
| 1084 |
+
def test_axis(self, samples, actual_kurt):
|
| 1085 |
+
samples = np.vstack([samples, np.nan * np.ones(len(samples))])
|
| 1086 |
+
kurt = nanops.nankurt(samples, axis=1)
|
| 1087 |
+
tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan]))
|
| 1088 |
+
|
| 1089 |
+
def test_nans(self, samples):
|
| 1090 |
+
samples = np.hstack([samples, np.nan])
|
| 1091 |
+
kurt = nanops.nankurt(samples, skipna=False)
|
| 1092 |
+
assert np.isnan(kurt)
|
| 1093 |
+
|
| 1094 |
+
def test_nans_skipna(self, samples, actual_kurt):
|
| 1095 |
+
samples = np.hstack([samples, np.nan])
|
| 1096 |
+
kurt = nanops.nankurt(samples, skipna=True)
|
| 1097 |
+
tm.assert_almost_equal(kurt, actual_kurt)
|
| 1098 |
+
|
| 1099 |
+
@property
|
| 1100 |
+
def prng(self):
|
| 1101 |
+
return np.random.default_rng(2)
|
| 1102 |
+
|
| 1103 |
+
|
| 1104 |
+
class TestDatetime64NaNOps:
|
| 1105 |
+
@pytest.fixture(params=["s", "ms", "us", "ns"])
|
| 1106 |
+
def unit(self, request):
|
| 1107 |
+
return request.param
|
| 1108 |
+
|
| 1109 |
+
# Enabling mean changes the behavior of DataFrame.mean
|
| 1110 |
+
# See https://github.com/pandas-dev/pandas/issues/24752
|
| 1111 |
+
def test_nanmean(self, unit):
|
| 1112 |
+
dti = pd.date_range("2016-01-01", periods=3).as_unit(unit)
|
| 1113 |
+
expected = dti[1]
|
| 1114 |
+
|
| 1115 |
+
for obj in [dti, dti._data]:
|
| 1116 |
+
result = nanops.nanmean(obj)
|
| 1117 |
+
assert result == expected
|
| 1118 |
+
|
| 1119 |
+
dti2 = dti.insert(1, pd.NaT)
|
| 1120 |
+
|
| 1121 |
+
for obj in [dti2, dti2._data]:
|
| 1122 |
+
result = nanops.nanmean(obj)
|
| 1123 |
+
assert result == expected
|
| 1124 |
+
|
| 1125 |
+
@pytest.mark.parametrize("constructor", ["M8", "m8"])
|
| 1126 |
+
def test_nanmean_skipna_false(self, constructor, unit):
|
| 1127 |
+
dtype = f"{constructor}[{unit}]"
|
| 1128 |
+
arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
|
| 1129 |
+
|
| 1130 |
+
arr[-1, -1] = "NaT"
|
| 1131 |
+
|
| 1132 |
+
result = nanops.nanmean(arr, skipna=False)
|
| 1133 |
+
assert np.isnat(result)
|
| 1134 |
+
assert result.dtype == dtype
|
| 1135 |
+
|
| 1136 |
+
result = nanops.nanmean(arr, axis=0, skipna=False)
|
| 1137 |
+
expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
|
| 1138 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1139 |
+
|
| 1140 |
+
result = nanops.nanmean(arr, axis=1, skipna=False)
|
| 1141 |
+
expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
|
| 1142 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1143 |
+
|
| 1144 |
+
|
| 1145 |
+
def test_use_bottleneck():
|
| 1146 |
+
if nanops._BOTTLENECK_INSTALLED:
|
| 1147 |
+
with pd.option_context("use_bottleneck", True):
|
| 1148 |
+
assert pd.get_option("use_bottleneck")
|
| 1149 |
+
|
| 1150 |
+
with pd.option_context("use_bottleneck", False):
|
| 1151 |
+
assert not pd.get_option("use_bottleneck")
|
| 1152 |
+
|
| 1153 |
+
|
| 1154 |
+
@pytest.mark.parametrize(
|
| 1155 |
+
"numpy_op, expected",
|
| 1156 |
+
[
|
| 1157 |
+
(np.sum, 10),
|
| 1158 |
+
(np.nansum, 10),
|
| 1159 |
+
(np.mean, 2.5),
|
| 1160 |
+
(np.nanmean, 2.5),
|
| 1161 |
+
(np.median, 2.5),
|
| 1162 |
+
(np.nanmedian, 2.5),
|
| 1163 |
+
(np.min, 1),
|
| 1164 |
+
(np.max, 4),
|
| 1165 |
+
(np.nanmin, 1),
|
| 1166 |
+
(np.nanmax, 4),
|
| 1167 |
+
],
|
| 1168 |
+
)
|
| 1169 |
+
def test_numpy_ops(numpy_op, expected):
|
| 1170 |
+
# GH8383
|
| 1171 |
+
result = numpy_op(Series([1, 2, 3, 4]))
|
| 1172 |
+
assert result == expected
|
| 1173 |
+
|
| 1174 |
+
|
| 1175 |
+
@pytest.mark.parametrize(
|
| 1176 |
+
"operation",
|
| 1177 |
+
[
|
| 1178 |
+
nanops.nanany,
|
| 1179 |
+
nanops.nanall,
|
| 1180 |
+
nanops.nansum,
|
| 1181 |
+
nanops.nanmean,
|
| 1182 |
+
nanops.nanmedian,
|
| 1183 |
+
nanops.nanstd,
|
| 1184 |
+
nanops.nanvar,
|
| 1185 |
+
nanops.nansem,
|
| 1186 |
+
nanops.nanargmax,
|
| 1187 |
+
nanops.nanargmin,
|
| 1188 |
+
nanops.nanmax,
|
| 1189 |
+
nanops.nanmin,
|
| 1190 |
+
nanops.nanskew,
|
| 1191 |
+
nanops.nankurt,
|
| 1192 |
+
nanops.nanprod,
|
| 1193 |
+
],
|
| 1194 |
+
)
|
| 1195 |
+
def test_nanops_independent_of_mask_param(operation):
|
| 1196 |
+
# GH22764
|
| 1197 |
+
ser = Series([1, 2, np.nan, 3, np.nan, 4])
|
| 1198 |
+
mask = ser.isna()
|
| 1199 |
+
median_expected = operation(ser._values)
|
| 1200 |
+
median_result = operation(ser._values, mask=mask)
|
| 1201 |
+
assert median_expected == median_result
|
| 1202 |
+
|
| 1203 |
+
|
| 1204 |
+
@pytest.mark.parametrize("min_count", [-1, 0])
|
| 1205 |
+
def test_check_below_min_count_negative_or_zero_min_count(min_count):
|
| 1206 |
+
# GH35227
|
| 1207 |
+
result = nanops.check_below_min_count((21, 37), None, min_count)
|
| 1208 |
+
expected_result = False
|
| 1209 |
+
assert result == expected_result
|
| 1210 |
+
|
| 1211 |
+
|
| 1212 |
+
@pytest.mark.parametrize(
|
| 1213 |
+
"mask", [None, np.array([False, False, True]), np.array([True] + 9 * [False])]
|
| 1214 |
+
)
|
| 1215 |
+
@pytest.mark.parametrize("min_count, expected_result", [(1, False), (101, True)])
|
| 1216 |
+
def test_check_below_min_count_positive_min_count(mask, min_count, expected_result):
|
| 1217 |
+
# GH35227
|
| 1218 |
+
shape = (10, 10)
|
| 1219 |
+
result = nanops.check_below_min_count(shape, mask, min_count)
|
| 1220 |
+
assert result == expected_result
|
| 1221 |
+
|
| 1222 |
+
|
| 1223 |
+
@td.skip_if_windows
|
| 1224 |
+
@td.skip_if_32bit
|
| 1225 |
+
@pytest.mark.parametrize("min_count, expected_result", [(1, False), (2812191852, True)])
|
| 1226 |
+
def test_check_below_min_count_large_shape(min_count, expected_result):
|
| 1227 |
+
# GH35227 large shape used to show that the issue is fixed
|
| 1228 |
+
shape = (2244367, 1253)
|
| 1229 |
+
result = nanops.check_below_min_count(shape, mask=None, min_count=min_count)
|
| 1230 |
+
assert result == expected_result
|
| 1231 |
+
|
| 1232 |
+
|
| 1233 |
+
@pytest.mark.parametrize("func", ["nanmean", "nansum"])
|
| 1234 |
+
def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
|
| 1235 |
+
# GH 42878 bottleneck sometimes produces unreliable results for mean and sum
|
| 1236 |
+
assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
|
| 1237 |
+
|
| 1238 |
+
|
| 1239 |
+
@pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
|
| 1240 |
+
def test_nanmean_overflow(disable_bottleneck, val):
|
| 1241 |
+
# GH 10155
|
| 1242 |
+
# In the previous implementation mean can overflow for int dtypes, it
|
| 1243 |
+
# is now consistent with numpy
|
| 1244 |
+
|
| 1245 |
+
ser = Series(val, index=range(500), dtype=np.int64)
|
| 1246 |
+
result = ser.mean()
|
| 1247 |
+
np_result = ser.values.mean()
|
| 1248 |
+
assert result == val
|
| 1249 |
+
assert result == np_result
|
| 1250 |
+
assert result.dtype == np.float64
|
| 1251 |
+
|
| 1252 |
+
|
| 1253 |
+
@pytest.mark.parametrize(
|
| 1254 |
+
"dtype",
|
| 1255 |
+
[
|
| 1256 |
+
np.int16,
|
| 1257 |
+
np.int32,
|
| 1258 |
+
np.int64,
|
| 1259 |
+
np.float32,
|
| 1260 |
+
np.float64,
|
| 1261 |
+
getattr(np, "float128", None),
|
| 1262 |
+
],
|
| 1263 |
+
)
|
| 1264 |
+
@pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
|
| 1265 |
+
def test_returned_dtype(disable_bottleneck, dtype, method):
|
| 1266 |
+
if dtype is None:
|
| 1267 |
+
pytest.skip("np.float128 not available")
|
| 1268 |
+
|
| 1269 |
+
ser = Series(range(10), dtype=dtype)
|
| 1270 |
+
result = getattr(ser, method)()
|
| 1271 |
+
if is_integer_dtype(dtype) and method not in ["min", "max"]:
|
| 1272 |
+
assert result.dtype == np.float64
|
| 1273 |
+
else:
|
| 1274 |
+
assert result.dtype == dtype
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_optional_dependency.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import types
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas.compat._optional import (
|
| 7 |
+
VERSIONS,
|
| 8 |
+
import_optional_dependency,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_import_optional():
|
| 15 |
+
match = "Missing .*notapackage.* pip .* conda .* notapackage"
|
| 16 |
+
with pytest.raises(ImportError, match=match) as exc_info:
|
| 17 |
+
import_optional_dependency("notapackage")
|
| 18 |
+
# The original exception should be there as context:
|
| 19 |
+
assert isinstance(exc_info.value.__context__, ImportError)
|
| 20 |
+
|
| 21 |
+
result = import_optional_dependency("notapackage", errors="ignore")
|
| 22 |
+
assert result is None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_xlrd_version_fallback():
|
| 26 |
+
pytest.importorskip("xlrd")
|
| 27 |
+
import_optional_dependency("xlrd")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def test_bad_version(monkeypatch):
|
| 31 |
+
name = "fakemodule"
|
| 32 |
+
module = types.ModuleType(name)
|
| 33 |
+
module.__version__ = "0.9.0"
|
| 34 |
+
sys.modules[name] = module
|
| 35 |
+
monkeypatch.setitem(VERSIONS, name, "1.0.0")
|
| 36 |
+
|
| 37 |
+
match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'"
|
| 38 |
+
with pytest.raises(ImportError, match=match):
|
| 39 |
+
import_optional_dependency("fakemodule")
|
| 40 |
+
|
| 41 |
+
# Test min_version parameter
|
| 42 |
+
result = import_optional_dependency("fakemodule", min_version="0.8")
|
| 43 |
+
assert result is module
|
| 44 |
+
|
| 45 |
+
with tm.assert_produces_warning(UserWarning):
|
| 46 |
+
result = import_optional_dependency("fakemodule", errors="warn")
|
| 47 |
+
assert result is None
|
| 48 |
+
|
| 49 |
+
module.__version__ = "1.0.0" # exact match is OK
|
| 50 |
+
result = import_optional_dependency("fakemodule")
|
| 51 |
+
assert result is module
|
| 52 |
+
|
| 53 |
+
with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
|
| 54 |
+
import_optional_dependency("fakemodule", min_version="1.1.0")
|
| 55 |
+
|
| 56 |
+
with tm.assert_produces_warning(UserWarning):
|
| 57 |
+
result = import_optional_dependency(
|
| 58 |
+
"fakemodule", errors="warn", min_version="1.1.0"
|
| 59 |
+
)
|
| 60 |
+
assert result is None
|
| 61 |
+
|
| 62 |
+
result = import_optional_dependency(
|
| 63 |
+
"fakemodule", errors="ignore", min_version="1.1.0"
|
| 64 |
+
)
|
| 65 |
+
assert result is None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_submodule(monkeypatch):
|
| 69 |
+
# Create a fake module with a submodule
|
| 70 |
+
name = "fakemodule"
|
| 71 |
+
module = types.ModuleType(name)
|
| 72 |
+
module.__version__ = "0.9.0"
|
| 73 |
+
sys.modules[name] = module
|
| 74 |
+
sub_name = "submodule"
|
| 75 |
+
submodule = types.ModuleType(sub_name)
|
| 76 |
+
setattr(module, sub_name, submodule)
|
| 77 |
+
sys.modules[f"{name}.{sub_name}"] = submodule
|
| 78 |
+
monkeypatch.setitem(VERSIONS, name, "1.0.0")
|
| 79 |
+
|
| 80 |
+
match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'"
|
| 81 |
+
with pytest.raises(ImportError, match=match):
|
| 82 |
+
import_optional_dependency("fakemodule.submodule")
|
| 83 |
+
|
| 84 |
+
with tm.assert_produces_warning(UserWarning):
|
| 85 |
+
result = import_optional_dependency("fakemodule.submodule", errors="warn")
|
| 86 |
+
assert result is None
|
| 87 |
+
|
| 88 |
+
module.__version__ = "1.0.0" # exact match is OK
|
| 89 |
+
result = import_optional_dependency("fakemodule.submodule")
|
| 90 |
+
assert result is submodule
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def test_no_version_raises(monkeypatch):
|
| 94 |
+
name = "fakemodule"
|
| 95 |
+
module = types.ModuleType(name)
|
| 96 |
+
sys.modules[name] = module
|
| 97 |
+
monkeypatch.setitem(VERSIONS, name, "1.0.0")
|
| 98 |
+
|
| 99 |
+
with pytest.raises(ImportError, match="Can't determine .* fakemodule"):
|
| 100 |
+
import_optional_dependency(name)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_register_accessor.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import Generator
|
| 2 |
+
import contextlib
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import pandas._testing as tm
|
| 8 |
+
from pandas.core import accessor
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def test_dirname_mixin() -> None:
|
| 12 |
+
# GH37173
|
| 13 |
+
|
| 14 |
+
class X(accessor.DirNamesMixin):
|
| 15 |
+
x = 1
|
| 16 |
+
y: int
|
| 17 |
+
|
| 18 |
+
def __init__(self) -> None:
|
| 19 |
+
self.z = 3
|
| 20 |
+
|
| 21 |
+
result = [attr_name for attr_name in dir(X()) if not attr_name.startswith("_")]
|
| 22 |
+
|
| 23 |
+
assert result == ["x", "z"]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@contextlib.contextmanager
|
| 27 |
+
def ensure_removed(obj, attr) -> Generator[None, None, None]:
|
| 28 |
+
"""Ensure that an attribute added to 'obj' during the test is
|
| 29 |
+
removed when we're done
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
yield
|
| 33 |
+
finally:
|
| 34 |
+
try:
|
| 35 |
+
delattr(obj, attr)
|
| 36 |
+
except AttributeError:
|
| 37 |
+
pass
|
| 38 |
+
obj._accessors.discard(attr)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class MyAccessor:
|
| 42 |
+
def __init__(self, obj) -> None:
|
| 43 |
+
self.obj = obj
|
| 44 |
+
self.item = "item"
|
| 45 |
+
|
| 46 |
+
@property
|
| 47 |
+
def prop(self):
|
| 48 |
+
return self.item
|
| 49 |
+
|
| 50 |
+
def method(self):
|
| 51 |
+
return self.item
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@pytest.mark.parametrize(
|
| 55 |
+
"obj, registrar",
|
| 56 |
+
[
|
| 57 |
+
(pd.Series, pd.api.extensions.register_series_accessor),
|
| 58 |
+
(pd.DataFrame, pd.api.extensions.register_dataframe_accessor),
|
| 59 |
+
(pd.Index, pd.api.extensions.register_index_accessor),
|
| 60 |
+
],
|
| 61 |
+
)
|
| 62 |
+
def test_register(obj, registrar):
|
| 63 |
+
with ensure_removed(obj, "mine"):
|
| 64 |
+
before = set(dir(obj))
|
| 65 |
+
registrar("mine")(MyAccessor)
|
| 66 |
+
o = obj([]) if obj is not pd.Series else obj([], dtype=object)
|
| 67 |
+
assert o.mine.prop == "item"
|
| 68 |
+
after = set(dir(obj))
|
| 69 |
+
assert (before ^ after) == {"mine"}
|
| 70 |
+
assert "mine" in obj._accessors
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def test_accessor_works():
|
| 74 |
+
with ensure_removed(pd.Series, "mine"):
|
| 75 |
+
pd.api.extensions.register_series_accessor("mine")(MyAccessor)
|
| 76 |
+
|
| 77 |
+
s = pd.Series([1, 2])
|
| 78 |
+
assert s.mine.obj is s
|
| 79 |
+
|
| 80 |
+
assert s.mine.prop == "item"
|
| 81 |
+
assert s.mine.method() == "item"
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def test_overwrite_warns():
|
| 85 |
+
match = r".*MyAccessor.*fake.*Series.*"
|
| 86 |
+
with tm.assert_produces_warning(UserWarning, match=match):
|
| 87 |
+
with ensure_removed(pd.Series, "fake"):
|
| 88 |
+
setattr(pd.Series, "fake", 123)
|
| 89 |
+
pd.api.extensions.register_series_accessor("fake")(MyAccessor)
|
| 90 |
+
s = pd.Series([1, 2])
|
| 91 |
+
assert s.fake.prop == "item"
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def test_raises_attribute_error():
|
| 95 |
+
with ensure_removed(pd.Series, "bad"):
|
| 96 |
+
|
| 97 |
+
@pd.api.extensions.register_series_accessor("bad")
|
| 98 |
+
class Bad:
|
| 99 |
+
def __init__(self, data) -> None:
|
| 100 |
+
raise AttributeError("whoops")
|
| 101 |
+
|
| 102 |
+
with pytest.raises(AttributeError, match="whoops"):
|
| 103 |
+
pd.Series([], dtype=object).bad
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_sorting.py
ADDED
|
@@ -0,0 +1,487 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import defaultdict
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from itertools import product
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pytest
|
| 7 |
+
|
| 8 |
+
from pandas import (
|
| 9 |
+
NA,
|
| 10 |
+
DataFrame,
|
| 11 |
+
MultiIndex,
|
| 12 |
+
Series,
|
| 13 |
+
array,
|
| 14 |
+
concat,
|
| 15 |
+
merge,
|
| 16 |
+
)
|
| 17 |
+
import pandas._testing as tm
|
| 18 |
+
from pandas.core.algorithms import safe_sort
|
| 19 |
+
import pandas.core.common as com
|
| 20 |
+
from pandas.core.sorting import (
|
| 21 |
+
_decons_group_index,
|
| 22 |
+
get_group_index,
|
| 23 |
+
is_int64_overflow_possible,
|
| 24 |
+
lexsort_indexer,
|
| 25 |
+
nargsort,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@pytest.fixture
|
| 30 |
+
def left_right():
|
| 31 |
+
low, high, n = -1 << 10, 1 << 10, 1 << 20
|
| 32 |
+
left = DataFrame(
|
| 33 |
+
np.random.default_rng(2).integers(low, high, (n, 7)), columns=list("ABCDEFG")
|
| 34 |
+
)
|
| 35 |
+
left["left"] = left.sum(axis=1)
|
| 36 |
+
|
| 37 |
+
# one-2-one match
|
| 38 |
+
i = np.random.default_rng(2).permutation(len(left))
|
| 39 |
+
right = left.iloc[i].copy()
|
| 40 |
+
right.columns = right.columns[:-1].tolist() + ["right"]
|
| 41 |
+
right.index = np.arange(len(right))
|
| 42 |
+
right["right"] *= -1
|
| 43 |
+
return left, right
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class TestSorting:
|
| 47 |
+
@pytest.mark.slow
|
| 48 |
+
def test_int64_overflow(self):
|
| 49 |
+
B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500)))
|
| 50 |
+
A = np.arange(2500)
|
| 51 |
+
df = DataFrame(
|
| 52 |
+
{
|
| 53 |
+
"A": A,
|
| 54 |
+
"B": B,
|
| 55 |
+
"C": A,
|
| 56 |
+
"D": B,
|
| 57 |
+
"E": A,
|
| 58 |
+
"F": B,
|
| 59 |
+
"G": A,
|
| 60 |
+
"H": B,
|
| 61 |
+
"values": np.random.default_rng(2).standard_normal(2500),
|
| 62 |
+
}
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
lg = df.groupby(["A", "B", "C", "D", "E", "F", "G", "H"])
|
| 66 |
+
rg = df.groupby(["H", "G", "F", "E", "D", "C", "B", "A"])
|
| 67 |
+
|
| 68 |
+
left = lg.sum()["values"]
|
| 69 |
+
right = rg.sum()["values"]
|
| 70 |
+
|
| 71 |
+
exp_index, _ = left.index.sortlevel()
|
| 72 |
+
tm.assert_index_equal(left.index, exp_index)
|
| 73 |
+
|
| 74 |
+
exp_index, _ = right.index.sortlevel(0)
|
| 75 |
+
tm.assert_index_equal(right.index, exp_index)
|
| 76 |
+
|
| 77 |
+
tups = list(map(tuple, df[["A", "B", "C", "D", "E", "F", "G", "H"]].values))
|
| 78 |
+
tups = com.asarray_tuplesafe(tups)
|
| 79 |
+
|
| 80 |
+
expected = df.groupby(tups).sum()["values"]
|
| 81 |
+
|
| 82 |
+
for k, v in expected.items():
|
| 83 |
+
assert left[k] == right[k[::-1]]
|
| 84 |
+
assert left[k] == v
|
| 85 |
+
assert len(left) == len(right)
|
| 86 |
+
|
| 87 |
+
def test_int64_overflow_groupby_large_range(self):
|
| 88 |
+
# GH9096
|
| 89 |
+
values = range(55109)
|
| 90 |
+
data = DataFrame.from_dict({"a": values, "b": values, "c": values, "d": values})
|
| 91 |
+
grouped = data.groupby(["a", "b", "c", "d"])
|
| 92 |
+
assert len(grouped) == len(values)
|
| 93 |
+
|
| 94 |
+
@pytest.mark.parametrize("agg", ["mean", "median"])
|
| 95 |
+
def test_int64_overflow_groupby_large_df_shuffled(self, agg):
|
| 96 |
+
rs = np.random.default_rng(2)
|
| 97 |
+
arr = rs.integers(-1 << 12, 1 << 12, (1 << 15, 5))
|
| 98 |
+
i = rs.choice(len(arr), len(arr) * 4)
|
| 99 |
+
arr = np.vstack((arr, arr[i])) # add some duplicate rows
|
| 100 |
+
|
| 101 |
+
i = rs.permutation(len(arr))
|
| 102 |
+
arr = arr[i] # shuffle rows
|
| 103 |
+
|
| 104 |
+
df = DataFrame(arr, columns=list("abcde"))
|
| 105 |
+
df["jim"], df["joe"] = np.zeros((2, len(df)))
|
| 106 |
+
gr = df.groupby(list("abcde"))
|
| 107 |
+
|
| 108 |
+
# verify this is testing what it is supposed to test!
|
| 109 |
+
assert is_int64_overflow_possible(gr._grouper.shape)
|
| 110 |
+
|
| 111 |
+
mi = MultiIndex.from_arrays(
|
| 112 |
+
[ar.ravel() for ar in np.array_split(np.unique(arr, axis=0), 5, axis=1)],
|
| 113 |
+
names=list("abcde"),
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
res = DataFrame(
|
| 117 |
+
np.zeros((len(mi), 2)), columns=["jim", "joe"], index=mi
|
| 118 |
+
).sort_index()
|
| 119 |
+
|
| 120 |
+
tm.assert_frame_equal(getattr(gr, agg)(), res)
|
| 121 |
+
|
| 122 |
+
@pytest.mark.parametrize(
|
| 123 |
+
"order, na_position, exp",
|
| 124 |
+
[
|
| 125 |
+
[
|
| 126 |
+
True,
|
| 127 |
+
"last",
|
| 128 |
+
list(range(5, 105)) + list(range(5)) + list(range(105, 110)),
|
| 129 |
+
],
|
| 130 |
+
[
|
| 131 |
+
True,
|
| 132 |
+
"first",
|
| 133 |
+
list(range(5)) + list(range(105, 110)) + list(range(5, 105)),
|
| 134 |
+
],
|
| 135 |
+
[
|
| 136 |
+
False,
|
| 137 |
+
"last",
|
| 138 |
+
list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)),
|
| 139 |
+
],
|
| 140 |
+
[
|
| 141 |
+
False,
|
| 142 |
+
"first",
|
| 143 |
+
list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)),
|
| 144 |
+
],
|
| 145 |
+
],
|
| 146 |
+
)
|
| 147 |
+
def test_lexsort_indexer(self, order, na_position, exp):
|
| 148 |
+
keys = [[np.nan] * 5 + list(range(100)) + [np.nan] * 5]
|
| 149 |
+
result = lexsort_indexer(keys, orders=order, na_position=na_position)
|
| 150 |
+
tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
|
| 151 |
+
|
| 152 |
+
@pytest.mark.parametrize(
|
| 153 |
+
"ascending, na_position, exp",
|
| 154 |
+
[
|
| 155 |
+
[
|
| 156 |
+
True,
|
| 157 |
+
"last",
|
| 158 |
+
list(range(5, 105)) + list(range(5)) + list(range(105, 110)),
|
| 159 |
+
],
|
| 160 |
+
[
|
| 161 |
+
True,
|
| 162 |
+
"first",
|
| 163 |
+
list(range(5)) + list(range(105, 110)) + list(range(5, 105)),
|
| 164 |
+
],
|
| 165 |
+
[
|
| 166 |
+
False,
|
| 167 |
+
"last",
|
| 168 |
+
list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)),
|
| 169 |
+
],
|
| 170 |
+
[
|
| 171 |
+
False,
|
| 172 |
+
"first",
|
| 173 |
+
list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)),
|
| 174 |
+
],
|
| 175 |
+
],
|
| 176 |
+
)
|
| 177 |
+
def test_nargsort(self, ascending, na_position, exp):
|
| 178 |
+
# list places NaNs last, np.array(..., dtype="O") may not place NaNs first
|
| 179 |
+
items = np.array([np.nan] * 5 + list(range(100)) + [np.nan] * 5, dtype="O")
|
| 180 |
+
|
| 181 |
+
# mergesort is the most difficult to get right because we want it to be
|
| 182 |
+
# stable.
|
| 183 |
+
|
| 184 |
+
# According to numpy/core/tests/test_multiarray, """The number of
|
| 185 |
+
# sorted items must be greater than ~50 to check the actual algorithm
|
| 186 |
+
# because quick and merge sort fall over to insertion sort for small
|
| 187 |
+
# arrays."""
|
| 188 |
+
|
| 189 |
+
result = nargsort(
|
| 190 |
+
items, kind="mergesort", ascending=ascending, na_position=na_position
|
| 191 |
+
)
|
| 192 |
+
tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
class TestMerge:
|
| 196 |
+
def test_int64_overflow_outer_merge(self):
|
| 197 |
+
# #2690, combinatorial explosion
|
| 198 |
+
df1 = DataFrame(
|
| 199 |
+
np.random.default_rng(2).standard_normal((1000, 7)),
|
| 200 |
+
columns=list("ABCDEF") + ["G1"],
|
| 201 |
+
)
|
| 202 |
+
df2 = DataFrame(
|
| 203 |
+
np.random.default_rng(3).standard_normal((1000, 7)),
|
| 204 |
+
columns=list("ABCDEF") + ["G2"],
|
| 205 |
+
)
|
| 206 |
+
result = merge(df1, df2, how="outer")
|
| 207 |
+
assert len(result) == 2000
|
| 208 |
+
|
| 209 |
+
@pytest.mark.slow
|
| 210 |
+
def test_int64_overflow_check_sum_col(self, left_right):
|
| 211 |
+
left, right = left_right
|
| 212 |
+
|
| 213 |
+
out = merge(left, right, how="outer")
|
| 214 |
+
assert len(out) == len(left)
|
| 215 |
+
tm.assert_series_equal(out["left"], -out["right"], check_names=False)
|
| 216 |
+
result = out.iloc[:, :-2].sum(axis=1)
|
| 217 |
+
tm.assert_series_equal(out["left"], result, check_names=False)
|
| 218 |
+
assert result.name is None
|
| 219 |
+
|
| 220 |
+
@pytest.mark.slow
|
| 221 |
+
@pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
|
| 222 |
+
def test_int64_overflow_how_merge(self, left_right, how):
|
| 223 |
+
left, right = left_right
|
| 224 |
+
|
| 225 |
+
out = merge(left, right, how="outer")
|
| 226 |
+
out.sort_values(out.columns.tolist(), inplace=True)
|
| 227 |
+
out.index = np.arange(len(out))
|
| 228 |
+
tm.assert_frame_equal(out, merge(left, right, how=how, sort=True))
|
| 229 |
+
|
| 230 |
+
@pytest.mark.slow
|
| 231 |
+
def test_int64_overflow_sort_false_order(self, left_right):
|
| 232 |
+
left, right = left_right
|
| 233 |
+
|
| 234 |
+
# check that left merge w/ sort=False maintains left frame order
|
| 235 |
+
out = merge(left, right, how="left", sort=False)
|
| 236 |
+
tm.assert_frame_equal(left, out[left.columns.tolist()])
|
| 237 |
+
|
| 238 |
+
out = merge(right, left, how="left", sort=False)
|
| 239 |
+
tm.assert_frame_equal(right, out[right.columns.tolist()])
|
| 240 |
+
|
| 241 |
+
@pytest.mark.slow
|
| 242 |
+
@pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
|
| 243 |
+
@pytest.mark.parametrize("sort", [True, False])
|
| 244 |
+
def test_int64_overflow_one_to_many_none_match(self, how, sort):
|
| 245 |
+
# one-2-many/none match
|
| 246 |
+
low, high, n = -1 << 10, 1 << 10, 1 << 11
|
| 247 |
+
left = DataFrame(
|
| 248 |
+
np.random.default_rng(2).integers(low, high, (n, 7)).astype("int64"),
|
| 249 |
+
columns=list("ABCDEFG"),
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
# confirm that this is checking what it is supposed to check
|
| 253 |
+
shape = left.apply(Series.nunique).values
|
| 254 |
+
assert is_int64_overflow_possible(shape)
|
| 255 |
+
|
| 256 |
+
# add duplicates to left frame
|
| 257 |
+
left = concat([left, left], ignore_index=True)
|
| 258 |
+
|
| 259 |
+
right = DataFrame(
|
| 260 |
+
np.random.default_rng(3).integers(low, high, (n // 2, 7)).astype("int64"),
|
| 261 |
+
columns=list("ABCDEFG"),
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
# add duplicates & overlap with left to the right frame
|
| 265 |
+
i = np.random.default_rng(4).choice(len(left), n)
|
| 266 |
+
right = concat([right, right, left.iloc[i]], ignore_index=True)
|
| 267 |
+
|
| 268 |
+
left["left"] = np.random.default_rng(2).standard_normal(len(left))
|
| 269 |
+
right["right"] = np.random.default_rng(2).standard_normal(len(right))
|
| 270 |
+
|
| 271 |
+
# shuffle left & right frames
|
| 272 |
+
i = np.random.default_rng(5).permutation(len(left))
|
| 273 |
+
left = left.iloc[i].copy()
|
| 274 |
+
left.index = np.arange(len(left))
|
| 275 |
+
|
| 276 |
+
i = np.random.default_rng(6).permutation(len(right))
|
| 277 |
+
right = right.iloc[i].copy()
|
| 278 |
+
right.index = np.arange(len(right))
|
| 279 |
+
|
| 280 |
+
# manually compute outer merge
|
| 281 |
+
ldict, rdict = defaultdict(list), defaultdict(list)
|
| 282 |
+
|
| 283 |
+
for idx, row in left.set_index(list("ABCDEFG")).iterrows():
|
| 284 |
+
ldict[idx].append(row["left"])
|
| 285 |
+
|
| 286 |
+
for idx, row in right.set_index(list("ABCDEFG")).iterrows():
|
| 287 |
+
rdict[idx].append(row["right"])
|
| 288 |
+
|
| 289 |
+
vals = []
|
| 290 |
+
for k, lval in ldict.items():
|
| 291 |
+
rval = rdict.get(k, [np.nan])
|
| 292 |
+
for lv, rv in product(lval, rval):
|
| 293 |
+
vals.append(
|
| 294 |
+
k
|
| 295 |
+
+ (
|
| 296 |
+
lv,
|
| 297 |
+
rv,
|
| 298 |
+
)
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
for k, rval in rdict.items():
|
| 302 |
+
if k not in ldict:
|
| 303 |
+
vals.extend(
|
| 304 |
+
k
|
| 305 |
+
+ (
|
| 306 |
+
np.nan,
|
| 307 |
+
rv,
|
| 308 |
+
)
|
| 309 |
+
for rv in rval
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
def align(df):
|
| 313 |
+
df = df.sort_values(df.columns.tolist())
|
| 314 |
+
df.index = np.arange(len(df))
|
| 315 |
+
return df
|
| 316 |
+
|
| 317 |
+
out = DataFrame(vals, columns=list("ABCDEFG") + ["left", "right"])
|
| 318 |
+
out = align(out)
|
| 319 |
+
|
| 320 |
+
jmask = {
|
| 321 |
+
"left": out["left"].notna(),
|
| 322 |
+
"right": out["right"].notna(),
|
| 323 |
+
"inner": out["left"].notna() & out["right"].notna(),
|
| 324 |
+
"outer": np.ones(len(out), dtype="bool"),
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
mask = jmask[how]
|
| 328 |
+
frame = align(out[mask].copy())
|
| 329 |
+
assert mask.all() ^ mask.any() or how == "outer"
|
| 330 |
+
|
| 331 |
+
res = merge(left, right, how=how, sort=sort)
|
| 332 |
+
if sort:
|
| 333 |
+
kcols = list("ABCDEFG")
|
| 334 |
+
tm.assert_frame_equal(
|
| 335 |
+
res[kcols].copy(), res[kcols].sort_values(kcols, kind="mergesort")
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
# as in GH9092 dtypes break with outer/right join
|
| 339 |
+
# 2021-12-18: dtype does not break anymore
|
| 340 |
+
tm.assert_frame_equal(frame, align(res))
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
@pytest.mark.parametrize(
|
| 344 |
+
"codes_list, shape",
|
| 345 |
+
[
|
| 346 |
+
[
|
| 347 |
+
[
|
| 348 |
+
np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100).astype(np.int64),
|
| 349 |
+
np.tile([0, 2, 4, 3, 0, 1, 2, 3], 100).astype(np.int64),
|
| 350 |
+
np.tile([5, 1, 0, 2, 3, 0, 5, 4], 100).astype(np.int64),
|
| 351 |
+
],
|
| 352 |
+
(4, 5, 6),
|
| 353 |
+
],
|
| 354 |
+
[
|
| 355 |
+
[
|
| 356 |
+
np.tile(np.arange(10000, dtype=np.int64), 5),
|
| 357 |
+
np.tile(np.arange(10000, dtype=np.int64), 5),
|
| 358 |
+
],
|
| 359 |
+
(10000, 10000),
|
| 360 |
+
],
|
| 361 |
+
],
|
| 362 |
+
)
|
| 363 |
+
def test_decons(codes_list, shape):
|
| 364 |
+
group_index = get_group_index(codes_list, shape, sort=True, xnull=True)
|
| 365 |
+
codes_list2 = _decons_group_index(group_index, shape)
|
| 366 |
+
|
| 367 |
+
for a, b in zip(codes_list, codes_list2):
|
| 368 |
+
tm.assert_numpy_array_equal(a, b)
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
class TestSafeSort:
|
| 372 |
+
@pytest.mark.parametrize(
|
| 373 |
+
"arg, exp",
|
| 374 |
+
[
|
| 375 |
+
[[3, 1, 2, 0, 4], [0, 1, 2, 3, 4]],
|
| 376 |
+
[
|
| 377 |
+
np.array(list("baaacb"), dtype=object),
|
| 378 |
+
np.array(list("aaabbc"), dtype=object),
|
| 379 |
+
],
|
| 380 |
+
[[], []],
|
| 381 |
+
],
|
| 382 |
+
)
|
| 383 |
+
def test_basic_sort(self, arg, exp):
|
| 384 |
+
result = safe_sort(np.array(arg))
|
| 385 |
+
expected = np.array(exp)
|
| 386 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 387 |
+
|
| 388 |
+
@pytest.mark.parametrize("verify", [True, False])
|
| 389 |
+
@pytest.mark.parametrize(
|
| 390 |
+
"codes, exp_codes",
|
| 391 |
+
[
|
| 392 |
+
[[0, 1, 1, 2, 3, 0, -1, 4], [3, 1, 1, 2, 0, 3, -1, 4]],
|
| 393 |
+
[[], []],
|
| 394 |
+
],
|
| 395 |
+
)
|
| 396 |
+
def test_codes(self, verify, codes, exp_codes):
|
| 397 |
+
values = np.array([3, 1, 2, 0, 4])
|
| 398 |
+
expected = np.array([0, 1, 2, 3, 4])
|
| 399 |
+
|
| 400 |
+
result, result_codes = safe_sort(
|
| 401 |
+
values, codes, use_na_sentinel=True, verify=verify
|
| 402 |
+
)
|
| 403 |
+
expected_codes = np.array(exp_codes, dtype=np.intp)
|
| 404 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 405 |
+
tm.assert_numpy_array_equal(result_codes, expected_codes)
|
| 406 |
+
|
| 407 |
+
def test_codes_out_of_bound(self):
|
| 408 |
+
values = np.array([3, 1, 2, 0, 4])
|
| 409 |
+
expected = np.array([0, 1, 2, 3, 4])
|
| 410 |
+
|
| 411 |
+
# out of bound indices
|
| 412 |
+
codes = [0, 101, 102, 2, 3, 0, 99, 4]
|
| 413 |
+
result, result_codes = safe_sort(values, codes, use_na_sentinel=True)
|
| 414 |
+
expected_codes = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp)
|
| 415 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 416 |
+
tm.assert_numpy_array_equal(result_codes, expected_codes)
|
| 417 |
+
|
| 418 |
+
def test_mixed_integer(self):
|
| 419 |
+
values = np.array(["b", 1, 0, "a", 0, "b"], dtype=object)
|
| 420 |
+
result = safe_sort(values)
|
| 421 |
+
expected = np.array([0, 0, 1, "a", "b", "b"], dtype=object)
|
| 422 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 423 |
+
|
| 424 |
+
def test_mixed_integer_with_codes(self):
|
| 425 |
+
values = np.array(["b", 1, 0, "a"], dtype=object)
|
| 426 |
+
codes = [0, 1, 2, 3, 0, -1, 1]
|
| 427 |
+
result, result_codes = safe_sort(values, codes)
|
| 428 |
+
expected = np.array([0, 1, "a", "b"], dtype=object)
|
| 429 |
+
expected_codes = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp)
|
| 430 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 431 |
+
tm.assert_numpy_array_equal(result_codes, expected_codes)
|
| 432 |
+
|
| 433 |
+
def test_unsortable(self):
|
| 434 |
+
# GH 13714
|
| 435 |
+
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
|
| 436 |
+
msg = "'[<>]' not supported between instances of .*"
|
| 437 |
+
with pytest.raises(TypeError, match=msg):
|
| 438 |
+
safe_sort(arr)
|
| 439 |
+
|
| 440 |
+
@pytest.mark.parametrize(
|
| 441 |
+
"arg, codes, err, msg",
|
| 442 |
+
[
|
| 443 |
+
[1, None, TypeError, "Only np.ndarray, ExtensionArray, and Index"],
|
| 444 |
+
[np.array([0, 1, 2]), 1, TypeError, "Only list-like objects or None"],
|
| 445 |
+
[np.array([0, 1, 2, 1]), [0, 1], ValueError, "values should be unique"],
|
| 446 |
+
],
|
| 447 |
+
)
|
| 448 |
+
def test_exceptions(self, arg, codes, err, msg):
|
| 449 |
+
with pytest.raises(err, match=msg):
|
| 450 |
+
safe_sort(values=arg, codes=codes)
|
| 451 |
+
|
| 452 |
+
@pytest.mark.parametrize(
|
| 453 |
+
"arg, exp", [[[1, 3, 2], [1, 2, 3]], [[1, 3, np.nan, 2], [1, 2, 3, np.nan]]]
|
| 454 |
+
)
|
| 455 |
+
def test_extension_array(self, arg, exp):
|
| 456 |
+
a = array(arg, dtype="Int64")
|
| 457 |
+
result = safe_sort(a)
|
| 458 |
+
expected = array(exp, dtype="Int64")
|
| 459 |
+
tm.assert_extension_array_equal(result, expected)
|
| 460 |
+
|
| 461 |
+
@pytest.mark.parametrize("verify", [True, False])
|
| 462 |
+
def test_extension_array_codes(self, verify):
|
| 463 |
+
a = array([1, 3, 2], dtype="Int64")
|
| 464 |
+
result, codes = safe_sort(a, [0, 1, -1, 2], use_na_sentinel=True, verify=verify)
|
| 465 |
+
expected_values = array([1, 2, 3], dtype="Int64")
|
| 466 |
+
expected_codes = np.array([0, 2, -1, 1], dtype=np.intp)
|
| 467 |
+
tm.assert_extension_array_equal(result, expected_values)
|
| 468 |
+
tm.assert_numpy_array_equal(codes, expected_codes)
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def test_mixed_str_null(nulls_fixture):
|
| 472 |
+
values = np.array(["b", nulls_fixture, "a", "b"], dtype=object)
|
| 473 |
+
result = safe_sort(values)
|
| 474 |
+
expected = np.array(["a", "b", "b", nulls_fixture], dtype=object)
|
| 475 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
def test_safe_sort_multiindex():
|
| 479 |
+
# GH#48412
|
| 480 |
+
arr1 = Series([2, 1, NA, NA], dtype="Int64")
|
| 481 |
+
arr2 = [2, 1, 3, 3]
|
| 482 |
+
midx = MultiIndex.from_arrays([arr1, arr2])
|
| 483 |
+
result = safe_sort(midx)
|
| 484 |
+
expected = MultiIndex.from_arrays(
|
| 485 |
+
[Series([1, 2, NA, NA], dtype="Int64"), [1, 2, 3, 3]]
|
| 486 |
+
)
|
| 487 |
+
tm.assert_index_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_take.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas._libs import iNaT
|
| 7 |
+
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
import pandas.core.algorithms as algos
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@pytest.fixture(
|
| 13 |
+
params=[
|
| 14 |
+
(np.int8, np.int16(127), np.int8),
|
| 15 |
+
(np.int8, np.int16(128), np.int16),
|
| 16 |
+
(np.int32, 1, np.int32),
|
| 17 |
+
(np.int32, 2.0, np.float64),
|
| 18 |
+
(np.int32, 3.0 + 4.0j, np.complex128),
|
| 19 |
+
(np.int32, True, np.object_),
|
| 20 |
+
(np.int32, "", np.object_),
|
| 21 |
+
(np.float64, 1, np.float64),
|
| 22 |
+
(np.float64, 2.0, np.float64),
|
| 23 |
+
(np.float64, 3.0 + 4.0j, np.complex128),
|
| 24 |
+
(np.float64, True, np.object_),
|
| 25 |
+
(np.float64, "", np.object_),
|
| 26 |
+
(np.complex128, 1, np.complex128),
|
| 27 |
+
(np.complex128, 2.0, np.complex128),
|
| 28 |
+
(np.complex128, 3.0 + 4.0j, np.complex128),
|
| 29 |
+
(np.complex128, True, np.object_),
|
| 30 |
+
(np.complex128, "", np.object_),
|
| 31 |
+
(np.bool_, 1, np.object_),
|
| 32 |
+
(np.bool_, 2.0, np.object_),
|
| 33 |
+
(np.bool_, 3.0 + 4.0j, np.object_),
|
| 34 |
+
(np.bool_, True, np.bool_),
|
| 35 |
+
(np.bool_, "", np.object_),
|
| 36 |
+
]
|
| 37 |
+
)
|
| 38 |
+
def dtype_fill_out_dtype(request):
|
| 39 |
+
return request.param
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class TestTake:
|
| 43 |
+
def test_1d_fill_nonna(self, dtype_fill_out_dtype):
|
| 44 |
+
dtype, fill_value, out_dtype = dtype_fill_out_dtype
|
| 45 |
+
data = np.random.default_rng(2).integers(0, 2, 4).astype(dtype)
|
| 46 |
+
indexer = [2, 1, 0, -1]
|
| 47 |
+
|
| 48 |
+
result = algos.take_nd(data, indexer, fill_value=fill_value)
|
| 49 |
+
assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all()
|
| 50 |
+
assert result[3] == fill_value
|
| 51 |
+
assert result.dtype == out_dtype
|
| 52 |
+
|
| 53 |
+
indexer = [2, 1, 0, 1]
|
| 54 |
+
|
| 55 |
+
result = algos.take_nd(data, indexer, fill_value=fill_value)
|
| 56 |
+
assert (result[[0, 1, 2, 3]] == data[indexer]).all()
|
| 57 |
+
assert result.dtype == dtype
|
| 58 |
+
|
| 59 |
+
def test_2d_fill_nonna(self, dtype_fill_out_dtype):
|
| 60 |
+
dtype, fill_value, out_dtype = dtype_fill_out_dtype
|
| 61 |
+
data = np.random.default_rng(2).integers(0, 2, (5, 3)).astype(dtype)
|
| 62 |
+
indexer = [2, 1, 0, -1]
|
| 63 |
+
|
| 64 |
+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
|
| 65 |
+
assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()
|
| 66 |
+
assert (result[3, :] == fill_value).all()
|
| 67 |
+
assert result.dtype == out_dtype
|
| 68 |
+
|
| 69 |
+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
|
| 70 |
+
assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()
|
| 71 |
+
assert (result[:, 3] == fill_value).all()
|
| 72 |
+
assert result.dtype == out_dtype
|
| 73 |
+
|
| 74 |
+
indexer = [2, 1, 0, 1]
|
| 75 |
+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
|
| 76 |
+
assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all()
|
| 77 |
+
assert result.dtype == dtype
|
| 78 |
+
|
| 79 |
+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
|
| 80 |
+
assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all()
|
| 81 |
+
assert result.dtype == dtype
|
| 82 |
+
|
| 83 |
+
def test_3d_fill_nonna(self, dtype_fill_out_dtype):
|
| 84 |
+
dtype, fill_value, out_dtype = dtype_fill_out_dtype
|
| 85 |
+
|
| 86 |
+
data = np.random.default_rng(2).integers(0, 2, (5, 4, 3)).astype(dtype)
|
| 87 |
+
indexer = [2, 1, 0, -1]
|
| 88 |
+
|
| 89 |
+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
|
| 90 |
+
assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()
|
| 91 |
+
assert (result[3, :, :] == fill_value).all()
|
| 92 |
+
assert result.dtype == out_dtype
|
| 93 |
+
|
| 94 |
+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
|
| 95 |
+
assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()
|
| 96 |
+
assert (result[:, 3, :] == fill_value).all()
|
| 97 |
+
assert result.dtype == out_dtype
|
| 98 |
+
|
| 99 |
+
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
|
| 100 |
+
assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()
|
| 101 |
+
assert (result[:, :, 3] == fill_value).all()
|
| 102 |
+
assert result.dtype == out_dtype
|
| 103 |
+
|
| 104 |
+
indexer = [2, 1, 0, 1]
|
| 105 |
+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
|
| 106 |
+
assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()
|
| 107 |
+
assert result.dtype == dtype
|
| 108 |
+
|
| 109 |
+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
|
| 110 |
+
assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()
|
| 111 |
+
assert result.dtype == dtype
|
| 112 |
+
|
| 113 |
+
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
|
| 114 |
+
assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()
|
| 115 |
+
assert result.dtype == dtype
|
| 116 |
+
|
| 117 |
+
def test_1d_other_dtypes(self):
|
| 118 |
+
arr = np.random.default_rng(2).standard_normal(10).astype(np.float32)
|
| 119 |
+
|
| 120 |
+
indexer = [1, 2, 3, -1]
|
| 121 |
+
result = algos.take_nd(arr, indexer)
|
| 122 |
+
expected = arr.take(indexer)
|
| 123 |
+
expected[-1] = np.nan
|
| 124 |
+
tm.assert_almost_equal(result, expected)
|
| 125 |
+
|
| 126 |
+
def test_2d_other_dtypes(self):
|
| 127 |
+
arr = np.random.default_rng(2).standard_normal((10, 5)).astype(np.float32)
|
| 128 |
+
|
| 129 |
+
indexer = [1, 2, 3, -1]
|
| 130 |
+
|
| 131 |
+
# axis=0
|
| 132 |
+
result = algos.take_nd(arr, indexer, axis=0)
|
| 133 |
+
expected = arr.take(indexer, axis=0)
|
| 134 |
+
expected[-1] = np.nan
|
| 135 |
+
tm.assert_almost_equal(result, expected)
|
| 136 |
+
|
| 137 |
+
# axis=1
|
| 138 |
+
result = algos.take_nd(arr, indexer, axis=1)
|
| 139 |
+
expected = arr.take(indexer, axis=1)
|
| 140 |
+
expected[:, -1] = np.nan
|
| 141 |
+
tm.assert_almost_equal(result, expected)
|
| 142 |
+
|
| 143 |
+
def test_1d_bool(self):
|
| 144 |
+
arr = np.array([0, 1, 0], dtype=bool)
|
| 145 |
+
|
| 146 |
+
result = algos.take_nd(arr, [0, 2, 2, 1])
|
| 147 |
+
expected = arr.take([0, 2, 2, 1])
|
| 148 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 149 |
+
|
| 150 |
+
result = algos.take_nd(arr, [0, 2, -1])
|
| 151 |
+
assert result.dtype == np.object_
|
| 152 |
+
|
| 153 |
+
def test_2d_bool(self):
|
| 154 |
+
arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool)
|
| 155 |
+
|
| 156 |
+
result = algos.take_nd(arr, [0, 2, 2, 1])
|
| 157 |
+
expected = arr.take([0, 2, 2, 1], axis=0)
|
| 158 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 159 |
+
|
| 160 |
+
result = algos.take_nd(arr, [0, 2, 2, 1], axis=1)
|
| 161 |
+
expected = arr.take([0, 2, 2, 1], axis=1)
|
| 162 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 163 |
+
|
| 164 |
+
result = algos.take_nd(arr, [0, 2, -1])
|
| 165 |
+
assert result.dtype == np.object_
|
| 166 |
+
|
| 167 |
+
def test_2d_float32(self):
|
| 168 |
+
arr = np.random.default_rng(2).standard_normal((4, 3)).astype(np.float32)
|
| 169 |
+
indexer = [0, 2, -1, 1, -1]
|
| 170 |
+
|
| 171 |
+
# axis=0
|
| 172 |
+
result = algos.take_nd(arr, indexer, axis=0)
|
| 173 |
+
|
| 174 |
+
expected = arr.take(indexer, axis=0)
|
| 175 |
+
expected[[2, 4], :] = np.nan
|
| 176 |
+
tm.assert_almost_equal(result, expected)
|
| 177 |
+
|
| 178 |
+
# axis=1
|
| 179 |
+
result = algos.take_nd(arr, indexer, axis=1)
|
| 180 |
+
expected = arr.take(indexer, axis=1)
|
| 181 |
+
expected[:, [2, 4]] = np.nan
|
| 182 |
+
tm.assert_almost_equal(result, expected)
|
| 183 |
+
|
| 184 |
+
def test_2d_datetime64(self):
|
| 185 |
+
# 2005/01/01 - 2006/01/01
|
| 186 |
+
arr = (
|
| 187 |
+
np.random.default_rng(2).integers(11_045_376, 11_360_736, (5, 3))
|
| 188 |
+
* 100_000_000_000
|
| 189 |
+
)
|
| 190 |
+
arr = arr.view(dtype="datetime64[ns]")
|
| 191 |
+
indexer = [0, 2, -1, 1, -1]
|
| 192 |
+
|
| 193 |
+
# axis=0
|
| 194 |
+
result = algos.take_nd(arr, indexer, axis=0)
|
| 195 |
+
expected = arr.take(indexer, axis=0)
|
| 196 |
+
expected.view(np.int64)[[2, 4], :] = iNaT
|
| 197 |
+
tm.assert_almost_equal(result, expected)
|
| 198 |
+
|
| 199 |
+
result = algos.take_nd(arr, indexer, axis=0, fill_value=datetime(2007, 1, 1))
|
| 200 |
+
expected = arr.take(indexer, axis=0)
|
| 201 |
+
expected[[2, 4], :] = datetime(2007, 1, 1)
|
| 202 |
+
tm.assert_almost_equal(result, expected)
|
| 203 |
+
|
| 204 |
+
# axis=1
|
| 205 |
+
result = algos.take_nd(arr, indexer, axis=1)
|
| 206 |
+
expected = arr.take(indexer, axis=1)
|
| 207 |
+
expected.view(np.int64)[:, [2, 4]] = iNaT
|
| 208 |
+
tm.assert_almost_equal(result, expected)
|
| 209 |
+
|
| 210 |
+
result = algos.take_nd(arr, indexer, axis=1, fill_value=datetime(2007, 1, 1))
|
| 211 |
+
expected = arr.take(indexer, axis=1)
|
| 212 |
+
expected[:, [2, 4]] = datetime(2007, 1, 1)
|
| 213 |
+
tm.assert_almost_equal(result, expected)
|
| 214 |
+
|
| 215 |
+
def test_take_axis_0(self):
|
| 216 |
+
arr = np.arange(12).reshape(4, 3)
|
| 217 |
+
result = algos.take(arr, [0, -1])
|
| 218 |
+
expected = np.array([[0, 1, 2], [9, 10, 11]])
|
| 219 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 220 |
+
|
| 221 |
+
# allow_fill=True
|
| 222 |
+
result = algos.take(arr, [0, -1], allow_fill=True, fill_value=0)
|
| 223 |
+
expected = np.array([[0, 1, 2], [0, 0, 0]])
|
| 224 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 225 |
+
|
| 226 |
+
def test_take_axis_1(self):
|
| 227 |
+
arr = np.arange(12).reshape(4, 3)
|
| 228 |
+
result = algos.take(arr, [0, -1], axis=1)
|
| 229 |
+
expected = np.array([[0, 2], [3, 5], [6, 8], [9, 11]])
|
| 230 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 231 |
+
|
| 232 |
+
# allow_fill=True
|
| 233 |
+
result = algos.take(arr, [0, -1], axis=1, allow_fill=True, fill_value=0)
|
| 234 |
+
expected = np.array([[0, 0], [3, 0], [6, 0], [9, 0]])
|
| 235 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 236 |
+
|
| 237 |
+
# GH#26976 make sure we validate along the correct axis
|
| 238 |
+
with pytest.raises(IndexError, match="indices are out-of-bounds"):
|
| 239 |
+
algos.take(arr, [0, 3], axis=1, allow_fill=True, fill_value=0)
|
| 240 |
+
|
| 241 |
+
def test_take_non_hashable_fill_value(self):
|
| 242 |
+
arr = np.array([1, 2, 3])
|
| 243 |
+
indexer = np.array([1, -1])
|
| 244 |
+
with pytest.raises(ValueError, match="fill_value must be a scalar"):
|
| 245 |
+
algos.take(arr, indexer, allow_fill=True, fill_value=[1])
|
| 246 |
+
|
| 247 |
+
# with object dtype it is allowed
|
| 248 |
+
arr = np.array([1, 2, 3], dtype=object)
|
| 249 |
+
result = algos.take(arr, indexer, allow_fill=True, fill_value=[1])
|
| 250 |
+
expected = np.array([2, [1]], dtype=object)
|
| 251 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
class TestExtensionTake:
|
| 255 |
+
# The take method found in pd.api.extensions
|
| 256 |
+
|
| 257 |
+
def test_bounds_check_large(self):
|
| 258 |
+
arr = np.array([1, 2])
|
| 259 |
+
|
| 260 |
+
msg = "indices are out-of-bounds"
|
| 261 |
+
with pytest.raises(IndexError, match=msg):
|
| 262 |
+
algos.take(arr, [2, 3], allow_fill=True)
|
| 263 |
+
|
| 264 |
+
msg = "index 2 is out of bounds for( axis 0 with)? size 2"
|
| 265 |
+
with pytest.raises(IndexError, match=msg):
|
| 266 |
+
algos.take(arr, [2, 3], allow_fill=False)
|
| 267 |
+
|
| 268 |
+
def test_bounds_check_small(self):
|
| 269 |
+
arr = np.array([1, 2, 3], dtype=np.int64)
|
| 270 |
+
indexer = [0, -1, -2]
|
| 271 |
+
|
| 272 |
+
msg = r"'indices' contains values less than allowed \(-2 < -1\)"
|
| 273 |
+
with pytest.raises(ValueError, match=msg):
|
| 274 |
+
algos.take(arr, indexer, allow_fill=True)
|
| 275 |
+
|
| 276 |
+
result = algos.take(arr, indexer)
|
| 277 |
+
expected = np.array([1, 3, 2], dtype=np.int64)
|
| 278 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 279 |
+
|
| 280 |
+
@pytest.mark.parametrize("allow_fill", [True, False])
|
| 281 |
+
def test_take_empty(self, allow_fill):
|
| 282 |
+
arr = np.array([], dtype=np.int64)
|
| 283 |
+
# empty take is ok
|
| 284 |
+
result = algos.take(arr, [], allow_fill=allow_fill)
|
| 285 |
+
tm.assert_numpy_array_equal(arr, result)
|
| 286 |
+
|
| 287 |
+
msg = "|".join(
|
| 288 |
+
[
|
| 289 |
+
"cannot do a non-empty take from an empty axes.",
|
| 290 |
+
"indices are out-of-bounds",
|
| 291 |
+
]
|
| 292 |
+
)
|
| 293 |
+
with pytest.raises(IndexError, match=msg):
|
| 294 |
+
algos.take(arr, [0], allow_fill=allow_fill)
|
| 295 |
+
|
| 296 |
+
def test_take_na_empty(self):
|
| 297 |
+
result = algos.take(np.array([]), [-1, -1], allow_fill=True, fill_value=0.0)
|
| 298 |
+
expected = np.array([0.0, 0.0])
|
| 299 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 300 |
+
|
| 301 |
+
def test_take_coerces_list(self):
|
| 302 |
+
arr = [1, 2, 3]
|
| 303 |
+
msg = "take accepting non-standard inputs is deprecated"
|
| 304 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 305 |
+
result = algos.take(arr, [0, 0])
|
| 306 |
+
expected = np.array([1, 1])
|
| 307 |
+
tm.assert_numpy_array_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def __getattr__(key: str):
|
| 2 |
+
# These imports need to be lazy to avoid circular import errors
|
| 3 |
+
if key == "hash_array":
|
| 4 |
+
from pandas.core.util.hashing import hash_array
|
| 5 |
+
|
| 6 |
+
return hash_array
|
| 7 |
+
if key == "hash_pandas_object":
|
| 8 |
+
from pandas.core.util.hashing import hash_pandas_object
|
| 9 |
+
|
| 10 |
+
return hash_pandas_object
|
| 11 |
+
if key == "Appender":
|
| 12 |
+
from pandas.util._decorators import Appender
|
| 13 |
+
|
| 14 |
+
return Appender
|
| 15 |
+
if key == "Substitution":
|
| 16 |
+
from pandas.util._decorators import Substitution
|
| 17 |
+
|
| 18 |
+
return Substitution
|
| 19 |
+
|
| 20 |
+
if key == "cache_readonly":
|
| 21 |
+
from pandas.util._decorators import cache_readonly
|
| 22 |
+
|
| 23 |
+
return cache_readonly
|
| 24 |
+
|
| 25 |
+
raise AttributeError(f"module 'pandas.util' has no attribute '{key}'")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def capitalize_first_letter(s):
|
| 29 |
+
return s[:1].upper() + s[1:]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_decorators.py
ADDED
|
@@ -0,0 +1,508 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from functools import wraps
|
| 4 |
+
import inspect
|
| 5 |
+
from textwrap import dedent
|
| 6 |
+
from typing import (
|
| 7 |
+
TYPE_CHECKING,
|
| 8 |
+
Any,
|
| 9 |
+
Callable,
|
| 10 |
+
cast,
|
| 11 |
+
)
|
| 12 |
+
import warnings
|
| 13 |
+
|
| 14 |
+
from pandas._libs.properties import cache_readonly
|
| 15 |
+
from pandas._typing import (
|
| 16 |
+
F,
|
| 17 |
+
T,
|
| 18 |
+
)
|
| 19 |
+
from pandas.util._exceptions import find_stack_level
|
| 20 |
+
|
| 21 |
+
if TYPE_CHECKING:
|
| 22 |
+
from collections.abc import Mapping
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def deprecate(
|
| 26 |
+
name: str,
|
| 27 |
+
alternative: Callable[..., Any],
|
| 28 |
+
version: str,
|
| 29 |
+
alt_name: str | None = None,
|
| 30 |
+
klass: type[Warning] | None = None,
|
| 31 |
+
stacklevel: int = 2,
|
| 32 |
+
msg: str | None = None,
|
| 33 |
+
) -> Callable[[F], F]:
|
| 34 |
+
"""
|
| 35 |
+
Return a new function that emits a deprecation warning on use.
|
| 36 |
+
|
| 37 |
+
To use this method for a deprecated function, another function
|
| 38 |
+
`alternative` with the same signature must exist. The deprecated
|
| 39 |
+
function will emit a deprecation warning, and in the docstring
|
| 40 |
+
it will contain the deprecation directive with the provided version
|
| 41 |
+
so it can be detected for future removal.
|
| 42 |
+
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
name : str
|
| 46 |
+
Name of function to deprecate.
|
| 47 |
+
alternative : func
|
| 48 |
+
Function to use instead.
|
| 49 |
+
version : str
|
| 50 |
+
Version of pandas in which the method has been deprecated.
|
| 51 |
+
alt_name : str, optional
|
| 52 |
+
Name to use in preference of alternative.__name__.
|
| 53 |
+
klass : Warning, default FutureWarning
|
| 54 |
+
stacklevel : int, default 2
|
| 55 |
+
msg : str
|
| 56 |
+
The message to display in the warning.
|
| 57 |
+
Default is '{name} is deprecated. Use {alt_name} instead.'
|
| 58 |
+
"""
|
| 59 |
+
alt_name = alt_name or alternative.__name__
|
| 60 |
+
klass = klass or FutureWarning
|
| 61 |
+
warning_msg = msg or f"{name} is deprecated, use {alt_name} instead."
|
| 62 |
+
|
| 63 |
+
@wraps(alternative)
|
| 64 |
+
def wrapper(*args, **kwargs) -> Callable[..., Any]:
|
| 65 |
+
warnings.warn(warning_msg, klass, stacklevel=stacklevel)
|
| 66 |
+
return alternative(*args, **kwargs)
|
| 67 |
+
|
| 68 |
+
# adding deprecated directive to the docstring
|
| 69 |
+
msg = msg or f"Use `{alt_name}` instead."
|
| 70 |
+
doc_error_msg = (
|
| 71 |
+
"deprecate needs a correctly formatted docstring in "
|
| 72 |
+
"the target function (should have a one liner short "
|
| 73 |
+
"summary, and opening quotes should be in their own "
|
| 74 |
+
f"line). Found:\n{alternative.__doc__}"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# when python is running in optimized mode (i.e. `-OO`), docstrings are
|
| 78 |
+
# removed, so we check that a docstring with correct formatting is used
|
| 79 |
+
# but we allow empty docstrings
|
| 80 |
+
if alternative.__doc__:
|
| 81 |
+
if alternative.__doc__.count("\n") < 3:
|
| 82 |
+
raise AssertionError(doc_error_msg)
|
| 83 |
+
empty1, summary, empty2, doc_string = alternative.__doc__.split("\n", 3)
|
| 84 |
+
if empty1 or empty2 and not summary:
|
| 85 |
+
raise AssertionError(doc_error_msg)
|
| 86 |
+
wrapper.__doc__ = dedent(
|
| 87 |
+
f"""
|
| 88 |
+
{summary.strip()}
|
| 89 |
+
|
| 90 |
+
.. deprecated:: {version}
|
| 91 |
+
{msg}
|
| 92 |
+
|
| 93 |
+
{dedent(doc_string)}"""
|
| 94 |
+
)
|
| 95 |
+
# error: Incompatible return value type (got "Callable[[VarArg(Any), KwArg(Any)],
|
| 96 |
+
# Callable[...,Any]]", expected "Callable[[F], F]")
|
| 97 |
+
return wrapper # type: ignore[return-value]
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def deprecate_kwarg(
|
| 101 |
+
old_arg_name: str,
|
| 102 |
+
new_arg_name: str | None,
|
| 103 |
+
mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None,
|
| 104 |
+
stacklevel: int = 2,
|
| 105 |
+
) -> Callable[[F], F]:
|
| 106 |
+
"""
|
| 107 |
+
Decorator to deprecate a keyword argument of a function.
|
| 108 |
+
|
| 109 |
+
Parameters
|
| 110 |
+
----------
|
| 111 |
+
old_arg_name : str
|
| 112 |
+
Name of argument in function to deprecate
|
| 113 |
+
new_arg_name : str or None
|
| 114 |
+
Name of preferred argument in function. Use None to raise warning that
|
| 115 |
+
``old_arg_name`` keyword is deprecated.
|
| 116 |
+
mapping : dict or callable
|
| 117 |
+
If mapping is present, use it to translate old arguments to
|
| 118 |
+
new arguments. A callable must do its own value checking;
|
| 119 |
+
values not found in a dict will be forwarded unchanged.
|
| 120 |
+
|
| 121 |
+
Examples
|
| 122 |
+
--------
|
| 123 |
+
The following deprecates 'cols', using 'columns' instead
|
| 124 |
+
|
| 125 |
+
>>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns')
|
| 126 |
+
... def f(columns=''):
|
| 127 |
+
... print(columns)
|
| 128 |
+
...
|
| 129 |
+
>>> f(columns='should work ok')
|
| 130 |
+
should work ok
|
| 131 |
+
|
| 132 |
+
>>> f(cols='should raise warning') # doctest: +SKIP
|
| 133 |
+
FutureWarning: cols is deprecated, use columns instead
|
| 134 |
+
warnings.warn(msg, FutureWarning)
|
| 135 |
+
should raise warning
|
| 136 |
+
|
| 137 |
+
>>> f(cols='should error', columns="can\'t pass do both") # doctest: +SKIP
|
| 138 |
+
TypeError: Can only specify 'cols' or 'columns', not both
|
| 139 |
+
|
| 140 |
+
>>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
|
| 141 |
+
... def f(new=False):
|
| 142 |
+
... print('yes!' if new else 'no!')
|
| 143 |
+
...
|
| 144 |
+
>>> f(old='yes') # doctest: +SKIP
|
| 145 |
+
FutureWarning: old='yes' is deprecated, use new=True instead
|
| 146 |
+
warnings.warn(msg, FutureWarning)
|
| 147 |
+
yes!
|
| 148 |
+
|
| 149 |
+
To raise a warning that a keyword will be removed entirely in the future
|
| 150 |
+
|
| 151 |
+
>>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None)
|
| 152 |
+
... def f(cols='', another_param=''):
|
| 153 |
+
... print(cols)
|
| 154 |
+
...
|
| 155 |
+
>>> f(cols='should raise warning') # doctest: +SKIP
|
| 156 |
+
FutureWarning: the 'cols' keyword is deprecated and will be removed in a
|
| 157 |
+
future version please takes steps to stop use of 'cols'
|
| 158 |
+
should raise warning
|
| 159 |
+
>>> f(another_param='should not raise warning') # doctest: +SKIP
|
| 160 |
+
should not raise warning
|
| 161 |
+
|
| 162 |
+
>>> f(cols='should raise warning', another_param='') # doctest: +SKIP
|
| 163 |
+
FutureWarning: the 'cols' keyword is deprecated and will be removed in a
|
| 164 |
+
future version please takes steps to stop use of 'cols'
|
| 165 |
+
should raise warning
|
| 166 |
+
"""
|
| 167 |
+
if mapping is not None and not hasattr(mapping, "get") and not callable(mapping):
|
| 168 |
+
raise TypeError(
|
| 169 |
+
"mapping from old to new argument values must be dict or callable!"
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
def _deprecate_kwarg(func: F) -> F:
|
| 173 |
+
@wraps(func)
|
| 174 |
+
def wrapper(*args, **kwargs) -> Callable[..., Any]:
|
| 175 |
+
old_arg_value = kwargs.pop(old_arg_name, None)
|
| 176 |
+
|
| 177 |
+
if old_arg_value is not None:
|
| 178 |
+
if new_arg_name is None:
|
| 179 |
+
msg = (
|
| 180 |
+
f"the {repr(old_arg_name)} keyword is deprecated and "
|
| 181 |
+
"will be removed in a future version. Please take "
|
| 182 |
+
f"steps to stop the use of {repr(old_arg_name)}"
|
| 183 |
+
)
|
| 184 |
+
warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
|
| 185 |
+
kwargs[old_arg_name] = old_arg_value
|
| 186 |
+
return func(*args, **kwargs)
|
| 187 |
+
|
| 188 |
+
elif mapping is not None:
|
| 189 |
+
if callable(mapping):
|
| 190 |
+
new_arg_value = mapping(old_arg_value)
|
| 191 |
+
else:
|
| 192 |
+
new_arg_value = mapping.get(old_arg_value, old_arg_value)
|
| 193 |
+
msg = (
|
| 194 |
+
f"the {old_arg_name}={repr(old_arg_value)} keyword is "
|
| 195 |
+
"deprecated, use "
|
| 196 |
+
f"{new_arg_name}={repr(new_arg_value)} instead."
|
| 197 |
+
)
|
| 198 |
+
else:
|
| 199 |
+
new_arg_value = old_arg_value
|
| 200 |
+
msg = (
|
| 201 |
+
f"the {repr(old_arg_name)} keyword is deprecated, "
|
| 202 |
+
f"use {repr(new_arg_name)} instead."
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
|
| 206 |
+
if kwargs.get(new_arg_name) is not None:
|
| 207 |
+
msg = (
|
| 208 |
+
f"Can only specify {repr(old_arg_name)} "
|
| 209 |
+
f"or {repr(new_arg_name)}, not both."
|
| 210 |
+
)
|
| 211 |
+
raise TypeError(msg)
|
| 212 |
+
kwargs[new_arg_name] = new_arg_value
|
| 213 |
+
return func(*args, **kwargs)
|
| 214 |
+
|
| 215 |
+
return cast(F, wrapper)
|
| 216 |
+
|
| 217 |
+
return _deprecate_kwarg
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def _format_argument_list(allow_args: list[str]) -> str:
|
| 221 |
+
"""
|
| 222 |
+
Convert the allow_args argument (either string or integer) of
|
| 223 |
+
`deprecate_nonkeyword_arguments` function to a string describing
|
| 224 |
+
it to be inserted into warning message.
|
| 225 |
+
|
| 226 |
+
Parameters
|
| 227 |
+
----------
|
| 228 |
+
allowed_args : list, tuple or int
|
| 229 |
+
The `allowed_args` argument for `deprecate_nonkeyword_arguments`,
|
| 230 |
+
but None value is not allowed.
|
| 231 |
+
|
| 232 |
+
Returns
|
| 233 |
+
-------
|
| 234 |
+
str
|
| 235 |
+
The substring describing the argument list in best way to be
|
| 236 |
+
inserted to the warning message.
|
| 237 |
+
|
| 238 |
+
Examples
|
| 239 |
+
--------
|
| 240 |
+
`format_argument_list([])` -> ''
|
| 241 |
+
`format_argument_list(['a'])` -> "except for the arguments 'a'"
|
| 242 |
+
`format_argument_list(['a', 'b'])` -> "except for the arguments 'a' and 'b'"
|
| 243 |
+
`format_argument_list(['a', 'b', 'c'])` ->
|
| 244 |
+
"except for the arguments 'a', 'b' and 'c'"
|
| 245 |
+
"""
|
| 246 |
+
if "self" in allow_args:
|
| 247 |
+
allow_args.remove("self")
|
| 248 |
+
if not allow_args:
|
| 249 |
+
return ""
|
| 250 |
+
elif len(allow_args) == 1:
|
| 251 |
+
return f" except for the argument '{allow_args[0]}'"
|
| 252 |
+
else:
|
| 253 |
+
last = allow_args[-1]
|
| 254 |
+
args = ", ".join(["'" + x + "'" for x in allow_args[:-1]])
|
| 255 |
+
return f" except for the arguments {args} and '{last}'"
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def future_version_msg(version: str | None) -> str:
|
| 259 |
+
"""Specify which version of pandas the deprecation will take place in."""
|
| 260 |
+
if version is None:
|
| 261 |
+
return "In a future version of pandas"
|
| 262 |
+
else:
|
| 263 |
+
return f"Starting with pandas version {version}"
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def deprecate_nonkeyword_arguments(
|
| 267 |
+
version: str | None,
|
| 268 |
+
allowed_args: list[str] | None = None,
|
| 269 |
+
name: str | None = None,
|
| 270 |
+
) -> Callable[[F], F]:
|
| 271 |
+
"""
|
| 272 |
+
Decorator to deprecate a use of non-keyword arguments of a function.
|
| 273 |
+
|
| 274 |
+
Parameters
|
| 275 |
+
----------
|
| 276 |
+
version : str, optional
|
| 277 |
+
The version in which positional arguments will become
|
| 278 |
+
keyword-only. If None, then the warning message won't
|
| 279 |
+
specify any particular version.
|
| 280 |
+
|
| 281 |
+
allowed_args : list, optional
|
| 282 |
+
In case of list, it must be the list of names of some
|
| 283 |
+
first arguments of the decorated functions that are
|
| 284 |
+
OK to be given as positional arguments. In case of None value,
|
| 285 |
+
defaults to list of all arguments not having the
|
| 286 |
+
default value.
|
| 287 |
+
|
| 288 |
+
name : str, optional
|
| 289 |
+
The specific name of the function to show in the warning
|
| 290 |
+
message. If None, then the Qualified name of the function
|
| 291 |
+
is used.
|
| 292 |
+
"""
|
| 293 |
+
|
| 294 |
+
def decorate(func):
|
| 295 |
+
old_sig = inspect.signature(func)
|
| 296 |
+
|
| 297 |
+
if allowed_args is not None:
|
| 298 |
+
allow_args = allowed_args
|
| 299 |
+
else:
|
| 300 |
+
allow_args = [
|
| 301 |
+
p.name
|
| 302 |
+
for p in old_sig.parameters.values()
|
| 303 |
+
if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
|
| 304 |
+
and p.default is p.empty
|
| 305 |
+
]
|
| 306 |
+
|
| 307 |
+
new_params = [
|
| 308 |
+
p.replace(kind=p.KEYWORD_ONLY)
|
| 309 |
+
if (
|
| 310 |
+
p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
|
| 311 |
+
and p.name not in allow_args
|
| 312 |
+
)
|
| 313 |
+
else p
|
| 314 |
+
for p in old_sig.parameters.values()
|
| 315 |
+
]
|
| 316 |
+
new_params.sort(key=lambda p: p.kind)
|
| 317 |
+
new_sig = old_sig.replace(parameters=new_params)
|
| 318 |
+
|
| 319 |
+
num_allow_args = len(allow_args)
|
| 320 |
+
msg = (
|
| 321 |
+
f"{future_version_msg(version)} all arguments of "
|
| 322 |
+
f"{name or func.__qualname__}{{arguments}} will be keyword-only."
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
@wraps(func)
|
| 326 |
+
def wrapper(*args, **kwargs):
|
| 327 |
+
if len(args) > num_allow_args:
|
| 328 |
+
warnings.warn(
|
| 329 |
+
msg.format(arguments=_format_argument_list(allow_args)),
|
| 330 |
+
FutureWarning,
|
| 331 |
+
stacklevel=find_stack_level(),
|
| 332 |
+
)
|
| 333 |
+
return func(*args, **kwargs)
|
| 334 |
+
|
| 335 |
+
# error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
|
| 336 |
+
# attribute "__signature__"
|
| 337 |
+
wrapper.__signature__ = new_sig # type: ignore[attr-defined]
|
| 338 |
+
return wrapper
|
| 339 |
+
|
| 340 |
+
return decorate
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def doc(*docstrings: None | str | Callable, **params) -> Callable[[F], F]:
|
| 344 |
+
"""
|
| 345 |
+
A decorator to take docstring templates, concatenate them and perform string
|
| 346 |
+
substitution on them.
|
| 347 |
+
|
| 348 |
+
This decorator will add a variable "_docstring_components" to the wrapped
|
| 349 |
+
callable to keep track the original docstring template for potential usage.
|
| 350 |
+
If it should be consider as a template, it will be saved as a string.
|
| 351 |
+
Otherwise, it will be saved as callable, and later user __doc__ and dedent
|
| 352 |
+
to get docstring.
|
| 353 |
+
|
| 354 |
+
Parameters
|
| 355 |
+
----------
|
| 356 |
+
*docstrings : None, str, or callable
|
| 357 |
+
The string / docstring / docstring template to be appended in order
|
| 358 |
+
after default docstring under callable.
|
| 359 |
+
**params
|
| 360 |
+
The string which would be used to format docstring template.
|
| 361 |
+
"""
|
| 362 |
+
|
| 363 |
+
def decorator(decorated: F) -> F:
|
| 364 |
+
# collecting docstring and docstring templates
|
| 365 |
+
docstring_components: list[str | Callable] = []
|
| 366 |
+
if decorated.__doc__:
|
| 367 |
+
docstring_components.append(dedent(decorated.__doc__))
|
| 368 |
+
|
| 369 |
+
for docstring in docstrings:
|
| 370 |
+
if docstring is None:
|
| 371 |
+
continue
|
| 372 |
+
if hasattr(docstring, "_docstring_components"):
|
| 373 |
+
docstring_components.extend(
|
| 374 |
+
docstring._docstring_components # pyright: ignore[reportGeneralTypeIssues]
|
| 375 |
+
)
|
| 376 |
+
elif isinstance(docstring, str) or docstring.__doc__:
|
| 377 |
+
docstring_components.append(docstring)
|
| 378 |
+
|
| 379 |
+
params_applied = [
|
| 380 |
+
component.format(**params)
|
| 381 |
+
if isinstance(component, str) and len(params) > 0
|
| 382 |
+
else component
|
| 383 |
+
for component in docstring_components
|
| 384 |
+
]
|
| 385 |
+
|
| 386 |
+
decorated.__doc__ = "".join(
|
| 387 |
+
[
|
| 388 |
+
component
|
| 389 |
+
if isinstance(component, str)
|
| 390 |
+
else dedent(component.__doc__ or "")
|
| 391 |
+
for component in params_applied
|
| 392 |
+
]
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
# error: "F" has no attribute "_docstring_components"
|
| 396 |
+
decorated._docstring_components = ( # type: ignore[attr-defined]
|
| 397 |
+
docstring_components
|
| 398 |
+
)
|
| 399 |
+
return decorated
|
| 400 |
+
|
| 401 |
+
return decorator
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
# Substitution and Appender are derived from matplotlib.docstring (1.1.0)
|
| 405 |
+
# module https://matplotlib.org/users/license.html
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
class Substitution:
|
| 409 |
+
"""
|
| 410 |
+
A decorator to take a function's docstring and perform string
|
| 411 |
+
substitution on it.
|
| 412 |
+
|
| 413 |
+
This decorator should be robust even if func.__doc__ is None
|
| 414 |
+
(for example, if -OO was passed to the interpreter)
|
| 415 |
+
|
| 416 |
+
Usage: construct a docstring.Substitution with a sequence or
|
| 417 |
+
dictionary suitable for performing substitution; then
|
| 418 |
+
decorate a suitable function with the constructed object. e.g.
|
| 419 |
+
|
| 420 |
+
sub_author_name = Substitution(author='Jason')
|
| 421 |
+
|
| 422 |
+
@sub_author_name
|
| 423 |
+
def some_function(x):
|
| 424 |
+
"%(author)s wrote this function"
|
| 425 |
+
|
| 426 |
+
# note that some_function.__doc__ is now "Jason wrote this function"
|
| 427 |
+
|
| 428 |
+
One can also use positional arguments.
|
| 429 |
+
|
| 430 |
+
sub_first_last_names = Substitution('Edgar Allen', 'Poe')
|
| 431 |
+
|
| 432 |
+
@sub_first_last_names
|
| 433 |
+
def some_function(x):
|
| 434 |
+
"%s %s wrote the Raven"
|
| 435 |
+
"""
|
| 436 |
+
|
| 437 |
+
def __init__(self, *args, **kwargs) -> None:
|
| 438 |
+
if args and kwargs:
|
| 439 |
+
raise AssertionError("Only positional or keyword args are allowed")
|
| 440 |
+
|
| 441 |
+
self.params = args or kwargs
|
| 442 |
+
|
| 443 |
+
def __call__(self, func: F) -> F:
|
| 444 |
+
func.__doc__ = func.__doc__ and func.__doc__ % self.params
|
| 445 |
+
return func
|
| 446 |
+
|
| 447 |
+
def update(self, *args, **kwargs) -> None:
|
| 448 |
+
"""
|
| 449 |
+
Update self.params with supplied args.
|
| 450 |
+
"""
|
| 451 |
+
if isinstance(self.params, dict):
|
| 452 |
+
self.params.update(*args, **kwargs)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
class Appender:
|
| 456 |
+
"""
|
| 457 |
+
A function decorator that will append an addendum to the docstring
|
| 458 |
+
of the target function.
|
| 459 |
+
|
| 460 |
+
This decorator should be robust even if func.__doc__ is None
|
| 461 |
+
(for example, if -OO was passed to the interpreter).
|
| 462 |
+
|
| 463 |
+
Usage: construct a docstring.Appender with a string to be joined to
|
| 464 |
+
the original docstring. An optional 'join' parameter may be supplied
|
| 465 |
+
which will be used to join the docstring and addendum. e.g.
|
| 466 |
+
|
| 467 |
+
add_copyright = Appender("Copyright (c) 2009", join='\n')
|
| 468 |
+
|
| 469 |
+
@add_copyright
|
| 470 |
+
def my_dog(has='fleas'):
|
| 471 |
+
"This docstring will have a copyright below"
|
| 472 |
+
pass
|
| 473 |
+
"""
|
| 474 |
+
|
| 475 |
+
addendum: str | None
|
| 476 |
+
|
| 477 |
+
def __init__(self, addendum: str | None, join: str = "", indents: int = 0) -> None:
|
| 478 |
+
if indents > 0:
|
| 479 |
+
self.addendum = indent(addendum, indents=indents)
|
| 480 |
+
else:
|
| 481 |
+
self.addendum = addendum
|
| 482 |
+
self.join = join
|
| 483 |
+
|
| 484 |
+
def __call__(self, func: T) -> T:
|
| 485 |
+
func.__doc__ = func.__doc__ if func.__doc__ else ""
|
| 486 |
+
self.addendum = self.addendum if self.addendum else ""
|
| 487 |
+
docitems = [func.__doc__, self.addendum]
|
| 488 |
+
func.__doc__ = dedent(self.join.join(docitems))
|
| 489 |
+
return func
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def indent(text: str | None, indents: int = 1) -> str:
|
| 493 |
+
if not text or not isinstance(text, str):
|
| 494 |
+
return ""
|
| 495 |
+
jointext = "".join(["\n"] + [" "] * indents)
|
| 496 |
+
return jointext.join(text.split("\n"))
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
__all__ = [
|
| 500 |
+
"Appender",
|
| 501 |
+
"cache_readonly",
|
| 502 |
+
"deprecate",
|
| 503 |
+
"deprecate_kwarg",
|
| 504 |
+
"deprecate_nonkeyword_arguments",
|
| 505 |
+
"doc",
|
| 506 |
+
"future_version_msg",
|
| 507 |
+
"Substitution",
|
| 508 |
+
]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_doctools.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import TYPE_CHECKING
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
if TYPE_CHECKING:
|
| 10 |
+
from collections.abc import Iterable
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TablePlotter:
|
| 14 |
+
"""
|
| 15 |
+
Layout some DataFrames in vertical/horizontal layout for explanation.
|
| 16 |
+
Used in merging.rst
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
cell_width: float = 0.37,
|
| 22 |
+
cell_height: float = 0.25,
|
| 23 |
+
font_size: float = 7.5,
|
| 24 |
+
) -> None:
|
| 25 |
+
self.cell_width = cell_width
|
| 26 |
+
self.cell_height = cell_height
|
| 27 |
+
self.font_size = font_size
|
| 28 |
+
|
| 29 |
+
def _shape(self, df: pd.DataFrame) -> tuple[int, int]:
|
| 30 |
+
"""
|
| 31 |
+
Calculate table shape considering index levels.
|
| 32 |
+
"""
|
| 33 |
+
row, col = df.shape
|
| 34 |
+
return row + df.columns.nlevels, col + df.index.nlevels
|
| 35 |
+
|
| 36 |
+
def _get_cells(self, left, right, vertical) -> tuple[int, int]:
|
| 37 |
+
"""
|
| 38 |
+
Calculate appropriate figure size based on left and right data.
|
| 39 |
+
"""
|
| 40 |
+
if vertical:
|
| 41 |
+
# calculate required number of cells
|
| 42 |
+
vcells = max(sum(self._shape(df)[0] for df in left), self._shape(right)[0])
|
| 43 |
+
hcells = max(self._shape(df)[1] for df in left) + self._shape(right)[1]
|
| 44 |
+
else:
|
| 45 |
+
vcells = max([self._shape(df)[0] for df in left] + [self._shape(right)[0]])
|
| 46 |
+
hcells = sum([self._shape(df)[1] for df in left] + [self._shape(right)[1]])
|
| 47 |
+
return hcells, vcells
|
| 48 |
+
|
| 49 |
+
def plot(self, left, right, labels: Iterable[str] = (), vertical: bool = True):
|
| 50 |
+
"""
|
| 51 |
+
Plot left / right DataFrames in specified layout.
|
| 52 |
+
|
| 53 |
+
Parameters
|
| 54 |
+
----------
|
| 55 |
+
left : list of DataFrames before operation is applied
|
| 56 |
+
right : DataFrame of operation result
|
| 57 |
+
labels : list of str to be drawn as titles of left DataFrames
|
| 58 |
+
vertical : bool, default True
|
| 59 |
+
If True, use vertical layout. If False, use horizontal layout.
|
| 60 |
+
"""
|
| 61 |
+
from matplotlib import gridspec
|
| 62 |
+
import matplotlib.pyplot as plt
|
| 63 |
+
|
| 64 |
+
if not isinstance(left, list):
|
| 65 |
+
left = [left]
|
| 66 |
+
left = [self._conv(df) for df in left]
|
| 67 |
+
right = self._conv(right)
|
| 68 |
+
|
| 69 |
+
hcells, vcells = self._get_cells(left, right, vertical)
|
| 70 |
+
|
| 71 |
+
if vertical:
|
| 72 |
+
figsize = self.cell_width * hcells, self.cell_height * vcells
|
| 73 |
+
else:
|
| 74 |
+
# include margin for titles
|
| 75 |
+
figsize = self.cell_width * hcells, self.cell_height * vcells
|
| 76 |
+
fig = plt.figure(figsize=figsize)
|
| 77 |
+
|
| 78 |
+
if vertical:
|
| 79 |
+
gs = gridspec.GridSpec(len(left), hcells)
|
| 80 |
+
# left
|
| 81 |
+
max_left_cols = max(self._shape(df)[1] for df in left)
|
| 82 |
+
max_left_rows = max(self._shape(df)[0] for df in left)
|
| 83 |
+
for i, (_left, _label) in enumerate(zip(left, labels)):
|
| 84 |
+
ax = fig.add_subplot(gs[i, 0:max_left_cols])
|
| 85 |
+
self._make_table(ax, _left, title=_label, height=1.0 / max_left_rows)
|
| 86 |
+
# right
|
| 87 |
+
ax = plt.subplot(gs[:, max_left_cols:])
|
| 88 |
+
self._make_table(ax, right, title="Result", height=1.05 / vcells)
|
| 89 |
+
fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95)
|
| 90 |
+
else:
|
| 91 |
+
max_rows = max(self._shape(df)[0] for df in left + [right])
|
| 92 |
+
height = 1.0 / np.max(max_rows)
|
| 93 |
+
gs = gridspec.GridSpec(1, hcells)
|
| 94 |
+
# left
|
| 95 |
+
i = 0
|
| 96 |
+
for df, _label in zip(left, labels):
|
| 97 |
+
sp = self._shape(df)
|
| 98 |
+
ax = fig.add_subplot(gs[0, i : i + sp[1]])
|
| 99 |
+
self._make_table(ax, df, title=_label, height=height)
|
| 100 |
+
i += sp[1]
|
| 101 |
+
# right
|
| 102 |
+
ax = plt.subplot(gs[0, i:])
|
| 103 |
+
self._make_table(ax, right, title="Result", height=height)
|
| 104 |
+
fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95)
|
| 105 |
+
|
| 106 |
+
return fig
|
| 107 |
+
|
| 108 |
+
def _conv(self, data):
|
| 109 |
+
"""
|
| 110 |
+
Convert each input to appropriate for table outplot.
|
| 111 |
+
"""
|
| 112 |
+
if isinstance(data, pd.Series):
|
| 113 |
+
if data.name is None:
|
| 114 |
+
data = data.to_frame(name="")
|
| 115 |
+
else:
|
| 116 |
+
data = data.to_frame()
|
| 117 |
+
data = data.fillna("NaN")
|
| 118 |
+
return data
|
| 119 |
+
|
| 120 |
+
def _insert_index(self, data):
|
| 121 |
+
# insert is destructive
|
| 122 |
+
data = data.copy()
|
| 123 |
+
idx_nlevels = data.index.nlevels
|
| 124 |
+
if idx_nlevels == 1:
|
| 125 |
+
data.insert(0, "Index", data.index)
|
| 126 |
+
else:
|
| 127 |
+
for i in range(idx_nlevels):
|
| 128 |
+
data.insert(i, f"Index{i}", data.index._get_level_values(i))
|
| 129 |
+
|
| 130 |
+
col_nlevels = data.columns.nlevels
|
| 131 |
+
if col_nlevels > 1:
|
| 132 |
+
col = data.columns._get_level_values(0)
|
| 133 |
+
values = [
|
| 134 |
+
data.columns._get_level_values(i)._values for i in range(1, col_nlevels)
|
| 135 |
+
]
|
| 136 |
+
col_df = pd.DataFrame(values)
|
| 137 |
+
data.columns = col_df.columns
|
| 138 |
+
data = pd.concat([col_df, data])
|
| 139 |
+
data.columns = col
|
| 140 |
+
return data
|
| 141 |
+
|
| 142 |
+
def _make_table(self, ax, df, title: str, height: float | None = None) -> None:
|
| 143 |
+
if df is None:
|
| 144 |
+
ax.set_visible(False)
|
| 145 |
+
return
|
| 146 |
+
|
| 147 |
+
from pandas import plotting
|
| 148 |
+
|
| 149 |
+
idx_nlevels = df.index.nlevels
|
| 150 |
+
col_nlevels = df.columns.nlevels
|
| 151 |
+
# must be convert here to get index levels for colorization
|
| 152 |
+
df = self._insert_index(df)
|
| 153 |
+
tb = plotting.table(ax, df, loc=9)
|
| 154 |
+
tb.set_fontsize(self.font_size)
|
| 155 |
+
|
| 156 |
+
if height is None:
|
| 157 |
+
height = 1.0 / (len(df) + 1)
|
| 158 |
+
|
| 159 |
+
props = tb.properties()
|
| 160 |
+
for (r, c), cell in props["celld"].items():
|
| 161 |
+
if c == -1:
|
| 162 |
+
cell.set_visible(False)
|
| 163 |
+
elif r < col_nlevels and c < idx_nlevels:
|
| 164 |
+
cell.set_visible(False)
|
| 165 |
+
elif r < col_nlevels or c < idx_nlevels:
|
| 166 |
+
cell.set_facecolor("#AAAAAA")
|
| 167 |
+
cell.set_height(height)
|
| 168 |
+
|
| 169 |
+
ax.set_title(title, size=self.font_size)
|
| 170 |
+
ax.axis("off")
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def main() -> None:
|
| 174 |
+
import matplotlib.pyplot as plt
|
| 175 |
+
|
| 176 |
+
p = TablePlotter()
|
| 177 |
+
|
| 178 |
+
df1 = pd.DataFrame({"A": [10, 11, 12], "B": [20, 21, 22], "C": [30, 31, 32]})
|
| 179 |
+
df2 = pd.DataFrame({"A": [10, 12], "C": [30, 32]})
|
| 180 |
+
|
| 181 |
+
p.plot([df1, df2], pd.concat([df1, df2]), labels=["df1", "df2"], vertical=True)
|
| 182 |
+
plt.show()
|
| 183 |
+
|
| 184 |
+
df3 = pd.DataFrame({"X": [10, 12], "Z": [30, 32]})
|
| 185 |
+
|
| 186 |
+
p.plot(
|
| 187 |
+
[df1, df3], pd.concat([df1, df3], axis=1), labels=["df1", "df2"], vertical=False
|
| 188 |
+
)
|
| 189 |
+
plt.show()
|
| 190 |
+
|
| 191 |
+
idx = pd.MultiIndex.from_tuples(
|
| 192 |
+
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")]
|
| 193 |
+
)
|
| 194 |
+
column = pd.MultiIndex.from_tuples([(1, "A"), (1, "B")])
|
| 195 |
+
df3 = pd.DataFrame({"v1": [1, 2, 3, 4, 5, 6], "v2": [5, 6, 7, 8, 9, 10]}, index=idx)
|
| 196 |
+
df3.columns = column
|
| 197 |
+
p.plot(df3, df3, labels=["df3"])
|
| 198 |
+
plt.show()
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
if __name__ == "__main__":
|
| 202 |
+
main()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_exceptions.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import inspect
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
from typing import TYPE_CHECKING
|
| 8 |
+
import warnings
|
| 9 |
+
|
| 10 |
+
if TYPE_CHECKING:
|
| 11 |
+
from collections.abc import Generator
|
| 12 |
+
from types import FrameType
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@contextlib.contextmanager
|
| 16 |
+
def rewrite_exception(old_name: str, new_name: str) -> Generator[None, None, None]:
|
| 17 |
+
"""
|
| 18 |
+
Rewrite the message of an exception.
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
yield
|
| 22 |
+
except Exception as err:
|
| 23 |
+
if not err.args:
|
| 24 |
+
raise
|
| 25 |
+
msg = str(err.args[0])
|
| 26 |
+
msg = msg.replace(old_name, new_name)
|
| 27 |
+
args: tuple[str, ...] = (msg,)
|
| 28 |
+
if len(err.args) > 1:
|
| 29 |
+
args = args + err.args[1:]
|
| 30 |
+
err.args = args
|
| 31 |
+
raise
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def find_stack_level() -> int:
|
| 35 |
+
"""
|
| 36 |
+
Find the first place in the stack that is not inside pandas
|
| 37 |
+
(tests notwithstanding).
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
import pandas as pd
|
| 41 |
+
|
| 42 |
+
pkg_dir = os.path.dirname(pd.__file__)
|
| 43 |
+
test_dir = os.path.join(pkg_dir, "tests")
|
| 44 |
+
|
| 45 |
+
# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
|
| 46 |
+
frame: FrameType | None = inspect.currentframe()
|
| 47 |
+
try:
|
| 48 |
+
n = 0
|
| 49 |
+
while frame:
|
| 50 |
+
filename = inspect.getfile(frame)
|
| 51 |
+
if filename.startswith(pkg_dir) and not filename.startswith(test_dir):
|
| 52 |
+
frame = frame.f_back
|
| 53 |
+
n += 1
|
| 54 |
+
else:
|
| 55 |
+
break
|
| 56 |
+
finally:
|
| 57 |
+
# See note in
|
| 58 |
+
# https://docs.python.org/3/library/inspect.html#inspect.Traceback
|
| 59 |
+
del frame
|
| 60 |
+
return n
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@contextlib.contextmanager
|
| 64 |
+
def rewrite_warning(
|
| 65 |
+
target_message: str,
|
| 66 |
+
target_category: type[Warning],
|
| 67 |
+
new_message: str,
|
| 68 |
+
new_category: type[Warning] | None = None,
|
| 69 |
+
) -> Generator[None, None, None]:
|
| 70 |
+
"""
|
| 71 |
+
Rewrite the message of a warning.
|
| 72 |
+
|
| 73 |
+
Parameters
|
| 74 |
+
----------
|
| 75 |
+
target_message : str
|
| 76 |
+
Warning message to match.
|
| 77 |
+
target_category : Warning
|
| 78 |
+
Warning type to match.
|
| 79 |
+
new_message : str
|
| 80 |
+
New warning message to emit.
|
| 81 |
+
new_category : Warning or None, default None
|
| 82 |
+
New warning type to emit. When None, will be the same as target_category.
|
| 83 |
+
"""
|
| 84 |
+
if new_category is None:
|
| 85 |
+
new_category = target_category
|
| 86 |
+
with warnings.catch_warnings(record=True) as record:
|
| 87 |
+
yield
|
| 88 |
+
if len(record) > 0:
|
| 89 |
+
match = re.compile(target_message)
|
| 90 |
+
for warning in record:
|
| 91 |
+
if warning.category is target_category and re.search(
|
| 92 |
+
match, str(warning.message)
|
| 93 |
+
):
|
| 94 |
+
category = new_category
|
| 95 |
+
message: Warning | str = new_message
|
| 96 |
+
else:
|
| 97 |
+
category, message = warning.category, warning.message
|
| 98 |
+
warnings.warn_explicit(
|
| 99 |
+
message=message,
|
| 100 |
+
category=category,
|
| 101 |
+
filename=warning.filename,
|
| 102 |
+
lineno=warning.lineno,
|
| 103 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_print_versions.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import codecs
|
| 4 |
+
import json
|
| 5 |
+
import locale
|
| 6 |
+
import os
|
| 7 |
+
import platform
|
| 8 |
+
import struct
|
| 9 |
+
import sys
|
| 10 |
+
from typing import TYPE_CHECKING
|
| 11 |
+
|
| 12 |
+
if TYPE_CHECKING:
|
| 13 |
+
from pandas._typing import JSONSerializable
|
| 14 |
+
|
| 15 |
+
from pandas.compat._optional import (
|
| 16 |
+
VERSIONS,
|
| 17 |
+
get_version,
|
| 18 |
+
import_optional_dependency,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _get_commit_hash() -> str | None:
|
| 23 |
+
"""
|
| 24 |
+
Use vendored versioneer code to get git hash, which handles
|
| 25 |
+
git worktree correctly.
|
| 26 |
+
"""
|
| 27 |
+
try:
|
| 28 |
+
from pandas._version_meson import ( # pyright: ignore [reportMissingImports]
|
| 29 |
+
__git_version__,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
return __git_version__
|
| 33 |
+
except ImportError:
|
| 34 |
+
from pandas._version import get_versions
|
| 35 |
+
|
| 36 |
+
versions = get_versions()
|
| 37 |
+
return versions["full-revisionid"]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _get_sys_info() -> dict[str, JSONSerializable]:
|
| 41 |
+
"""
|
| 42 |
+
Returns system information as a JSON serializable dictionary.
|
| 43 |
+
"""
|
| 44 |
+
uname_result = platform.uname()
|
| 45 |
+
language_code, encoding = locale.getlocale()
|
| 46 |
+
return {
|
| 47 |
+
"commit": _get_commit_hash(),
|
| 48 |
+
"python": platform.python_version(),
|
| 49 |
+
"python-bits": struct.calcsize("P") * 8,
|
| 50 |
+
"OS": uname_result.system,
|
| 51 |
+
"OS-release": uname_result.release,
|
| 52 |
+
"Version": uname_result.version,
|
| 53 |
+
"machine": uname_result.machine,
|
| 54 |
+
"processor": uname_result.processor,
|
| 55 |
+
"byteorder": sys.byteorder,
|
| 56 |
+
"LC_ALL": os.environ.get("LC_ALL"),
|
| 57 |
+
"LANG": os.environ.get("LANG"),
|
| 58 |
+
"LOCALE": {"language-code": language_code, "encoding": encoding},
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _get_dependency_info() -> dict[str, JSONSerializable]:
|
| 63 |
+
"""
|
| 64 |
+
Returns dependency information as a JSON serializable dictionary.
|
| 65 |
+
"""
|
| 66 |
+
deps = [
|
| 67 |
+
"pandas",
|
| 68 |
+
# required
|
| 69 |
+
"numpy",
|
| 70 |
+
"pytz",
|
| 71 |
+
"dateutil",
|
| 72 |
+
# install / build,
|
| 73 |
+
"pip",
|
| 74 |
+
"Cython",
|
| 75 |
+
# docs
|
| 76 |
+
"sphinx",
|
| 77 |
+
# Other, not imported.
|
| 78 |
+
"IPython",
|
| 79 |
+
]
|
| 80 |
+
# Optional dependencies
|
| 81 |
+
deps.extend(list(VERSIONS))
|
| 82 |
+
|
| 83 |
+
result: dict[str, JSONSerializable] = {}
|
| 84 |
+
for modname in deps:
|
| 85 |
+
try:
|
| 86 |
+
mod = import_optional_dependency(modname, errors="ignore")
|
| 87 |
+
except Exception:
|
| 88 |
+
# Dependency conflicts may cause a non ImportError
|
| 89 |
+
result[modname] = "N/A"
|
| 90 |
+
else:
|
| 91 |
+
result[modname] = get_version(mod) if mod else None
|
| 92 |
+
return result
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def show_versions(as_json: str | bool = False) -> None:
|
| 96 |
+
"""
|
| 97 |
+
Provide useful information, important for bug reports.
|
| 98 |
+
|
| 99 |
+
It comprises info about hosting operation system, pandas version,
|
| 100 |
+
and versions of other installed relative packages.
|
| 101 |
+
|
| 102 |
+
Parameters
|
| 103 |
+
----------
|
| 104 |
+
as_json : str or bool, default False
|
| 105 |
+
* If False, outputs info in a human readable form to the console.
|
| 106 |
+
* If str, it will be considered as a path to a file.
|
| 107 |
+
Info will be written to that file in JSON format.
|
| 108 |
+
* If True, outputs info in JSON format to the console.
|
| 109 |
+
|
| 110 |
+
Examples
|
| 111 |
+
--------
|
| 112 |
+
>>> pd.show_versions() # doctest: +SKIP
|
| 113 |
+
Your output may look something like this:
|
| 114 |
+
INSTALLED VERSIONS
|
| 115 |
+
------------------
|
| 116 |
+
commit : 37ea63d540fd27274cad6585082c91b1283f963d
|
| 117 |
+
python : 3.10.6.final.0
|
| 118 |
+
python-bits : 64
|
| 119 |
+
OS : Linux
|
| 120 |
+
OS-release : 5.10.102.1-microsoft-standard-WSL2
|
| 121 |
+
Version : #1 SMP Wed Mar 2 00:30:59 UTC 2022
|
| 122 |
+
machine : x86_64
|
| 123 |
+
processor : x86_64
|
| 124 |
+
byteorder : little
|
| 125 |
+
LC_ALL : None
|
| 126 |
+
LANG : en_GB.UTF-8
|
| 127 |
+
LOCALE : en_GB.UTF-8
|
| 128 |
+
pandas : 2.0.1
|
| 129 |
+
numpy : 1.24.3
|
| 130 |
+
...
|
| 131 |
+
"""
|
| 132 |
+
sys_info = _get_sys_info()
|
| 133 |
+
deps = _get_dependency_info()
|
| 134 |
+
|
| 135 |
+
if as_json:
|
| 136 |
+
j = {"system": sys_info, "dependencies": deps}
|
| 137 |
+
|
| 138 |
+
if as_json is True:
|
| 139 |
+
sys.stdout.writelines(json.dumps(j, indent=2))
|
| 140 |
+
else:
|
| 141 |
+
assert isinstance(as_json, str) # needed for mypy
|
| 142 |
+
with codecs.open(as_json, "wb", encoding="utf8") as f:
|
| 143 |
+
json.dump(j, f, indent=2)
|
| 144 |
+
|
| 145 |
+
else:
|
| 146 |
+
assert isinstance(sys_info["LOCALE"], dict) # needed for mypy
|
| 147 |
+
language_code = sys_info["LOCALE"]["language-code"]
|
| 148 |
+
encoding = sys_info["LOCALE"]["encoding"]
|
| 149 |
+
sys_info["LOCALE"] = f"{language_code}.{encoding}"
|
| 150 |
+
|
| 151 |
+
maxlen = max(len(x) for x in deps)
|
| 152 |
+
print("\nINSTALLED VERSIONS")
|
| 153 |
+
print("------------------")
|
| 154 |
+
for k, v in sys_info.items():
|
| 155 |
+
print(f"{k:<{maxlen}}: {v}")
|
| 156 |
+
print("")
|
| 157 |
+
for k, v in deps.items():
|
| 158 |
+
print(f"{k:<{maxlen}}: {v}")
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_test_decorators.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module provides decorator functions which can be applied to test objects
|
| 3 |
+
in order to skip those objects when certain conditions occur. A sample use case
|
| 4 |
+
is to detect if the platform is missing ``matplotlib``. If so, any test objects
|
| 5 |
+
which require ``matplotlib`` and decorated with ``@td.skip_if_no("matplotlib")``
|
| 6 |
+
will be skipped by ``pytest`` during the execution of the test suite.
|
| 7 |
+
|
| 8 |
+
To illustrate, after importing this module:
|
| 9 |
+
|
| 10 |
+
import pandas.util._test_decorators as td
|
| 11 |
+
|
| 12 |
+
The decorators can be applied to classes:
|
| 13 |
+
|
| 14 |
+
@td.skip_if_no("package")
|
| 15 |
+
class Foo:
|
| 16 |
+
...
|
| 17 |
+
|
| 18 |
+
Or individual functions:
|
| 19 |
+
|
| 20 |
+
@td.skip_if_no("package")
|
| 21 |
+
def test_foo():
|
| 22 |
+
...
|
| 23 |
+
|
| 24 |
+
For more information, refer to the ``pytest`` documentation on ``skipif``.
|
| 25 |
+
"""
|
| 26 |
+
from __future__ import annotations
|
| 27 |
+
|
| 28 |
+
import locale
|
| 29 |
+
from typing import (
|
| 30 |
+
TYPE_CHECKING,
|
| 31 |
+
Callable,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
import pytest
|
| 35 |
+
|
| 36 |
+
from pandas._config import get_option
|
| 37 |
+
|
| 38 |
+
if TYPE_CHECKING:
|
| 39 |
+
from pandas._typing import F
|
| 40 |
+
|
| 41 |
+
from pandas._config.config import _get_option
|
| 42 |
+
|
| 43 |
+
from pandas.compat import (
|
| 44 |
+
IS64,
|
| 45 |
+
is_platform_windows,
|
| 46 |
+
)
|
| 47 |
+
from pandas.compat._optional import import_optional_dependency
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def skip_if_installed(package: str) -> pytest.MarkDecorator:
|
| 51 |
+
"""
|
| 52 |
+
Skip a test if a package is installed.
|
| 53 |
+
|
| 54 |
+
Parameters
|
| 55 |
+
----------
|
| 56 |
+
package : str
|
| 57 |
+
The name of the package.
|
| 58 |
+
|
| 59 |
+
Returns
|
| 60 |
+
-------
|
| 61 |
+
pytest.MarkDecorator
|
| 62 |
+
a pytest.mark.skipif to use as either a test decorator or a
|
| 63 |
+
parametrization mark.
|
| 64 |
+
"""
|
| 65 |
+
return pytest.mark.skipif(
|
| 66 |
+
bool(import_optional_dependency(package, errors="ignore")),
|
| 67 |
+
reason=f"Skipping because {package} is installed.",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def skip_if_no(package: str, min_version: str | None = None) -> pytest.MarkDecorator:
|
| 72 |
+
"""
|
| 73 |
+
Generic function to help skip tests when required packages are not
|
| 74 |
+
present on the testing system.
|
| 75 |
+
|
| 76 |
+
This function returns a pytest mark with a skip condition that will be
|
| 77 |
+
evaluated during test collection. An attempt will be made to import the
|
| 78 |
+
specified ``package`` and optionally ensure it meets the ``min_version``
|
| 79 |
+
|
| 80 |
+
The mark can be used as either a decorator for a test class or to be
|
| 81 |
+
applied to parameters in pytest.mark.parametrize calls or parametrized
|
| 82 |
+
fixtures. Use pytest.importorskip if an imported moduled is later needed
|
| 83 |
+
or for test functions.
|
| 84 |
+
|
| 85 |
+
If the import and version check are unsuccessful, then the test function
|
| 86 |
+
(or test case when used in conjunction with parametrization) will be
|
| 87 |
+
skipped.
|
| 88 |
+
|
| 89 |
+
Parameters
|
| 90 |
+
----------
|
| 91 |
+
package: str
|
| 92 |
+
The name of the required package.
|
| 93 |
+
min_version: str or None, default None
|
| 94 |
+
Optional minimum version of the package.
|
| 95 |
+
|
| 96 |
+
Returns
|
| 97 |
+
-------
|
| 98 |
+
pytest.MarkDecorator
|
| 99 |
+
a pytest.mark.skipif to use as either a test decorator or a
|
| 100 |
+
parametrization mark.
|
| 101 |
+
"""
|
| 102 |
+
msg = f"Could not import '{package}'"
|
| 103 |
+
if min_version:
|
| 104 |
+
msg += f" satisfying a min_version of {min_version}"
|
| 105 |
+
return pytest.mark.skipif(
|
| 106 |
+
not bool(
|
| 107 |
+
import_optional_dependency(
|
| 108 |
+
package, errors="ignore", min_version=min_version
|
| 109 |
+
)
|
| 110 |
+
),
|
| 111 |
+
reason=msg,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit")
|
| 116 |
+
skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows")
|
| 117 |
+
skip_if_not_us_locale = pytest.mark.skipif(
|
| 118 |
+
locale.getlocale()[0] != "en_US",
|
| 119 |
+
reason=f"Set local {locale.getlocale()[0]} is not en_US",
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def parametrize_fixture_doc(*args) -> Callable[[F], F]:
|
| 124 |
+
"""
|
| 125 |
+
Intended for use as a decorator for parametrized fixture,
|
| 126 |
+
this function will wrap the decorated function with a pytest
|
| 127 |
+
``parametrize_fixture_doc`` mark. That mark will format
|
| 128 |
+
initial fixture docstring by replacing placeholders {0}, {1} etc
|
| 129 |
+
with parameters passed as arguments.
|
| 130 |
+
|
| 131 |
+
Parameters
|
| 132 |
+
----------
|
| 133 |
+
args: iterable
|
| 134 |
+
Positional arguments for docstring.
|
| 135 |
+
|
| 136 |
+
Returns
|
| 137 |
+
-------
|
| 138 |
+
function
|
| 139 |
+
The decorated function wrapped within a pytest
|
| 140 |
+
``parametrize_fixture_doc`` mark
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
def documented_fixture(fixture):
|
| 144 |
+
fixture.__doc__ = fixture.__doc__.format(*args)
|
| 145 |
+
return fixture
|
| 146 |
+
|
| 147 |
+
return documented_fixture
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def mark_array_manager_not_yet_implemented(request) -> None:
|
| 151 |
+
mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager")
|
| 152 |
+
request.applymarker(mark)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
skip_array_manager_not_yet_implemented = pytest.mark.xfail(
|
| 156 |
+
_get_option("mode.data_manager", silent=True) == "array",
|
| 157 |
+
reason="Not yet implemented for ArrayManager",
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
skip_array_manager_invalid_test = pytest.mark.skipif(
|
| 161 |
+
_get_option("mode.data_manager", silent=True) == "array",
|
| 162 |
+
reason="Test that relies on BlockManager internals or specific behaviour",
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
skip_copy_on_write_not_yet_implemented = pytest.mark.xfail(
|
| 166 |
+
get_option("mode.copy_on_write") is True,
|
| 167 |
+
reason="Not yet implemented/adapted for Copy-on-Write mode",
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
skip_copy_on_write_invalid_test = pytest.mark.skipif(
|
| 171 |
+
get_option("mode.copy_on_write") is True,
|
| 172 |
+
reason="Test not valid for Copy-on-Write mode",
|
| 173 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_tester.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Entrypoint for testing from the top-level namespace.
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
from pandas.compat._optional import import_optional_dependency
|
| 10 |
+
|
| 11 |
+
PKG = os.path.dirname(os.path.dirname(__file__))
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test(extra_args: list[str] | None = None, run_doctests: bool = False) -> None:
|
| 15 |
+
"""
|
| 16 |
+
Run the pandas test suite using pytest.
|
| 17 |
+
|
| 18 |
+
By default, runs with the marks -m "not slow and not network and not db"
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
extra_args : list[str], default None
|
| 23 |
+
Extra marks to run the tests.
|
| 24 |
+
run_doctests : bool, default False
|
| 25 |
+
Whether to only run the Python and Cython doctests. If you would like to run
|
| 26 |
+
both doctests/regular tests, just append "--doctest-modules"/"--doctest-cython"
|
| 27 |
+
to extra_args.
|
| 28 |
+
|
| 29 |
+
Examples
|
| 30 |
+
--------
|
| 31 |
+
>>> pd.test() # doctest: +SKIP
|
| 32 |
+
running: pytest...
|
| 33 |
+
"""
|
| 34 |
+
pytest = import_optional_dependency("pytest")
|
| 35 |
+
import_optional_dependency("hypothesis")
|
| 36 |
+
cmd = ["-m not slow and not network and not db"]
|
| 37 |
+
if extra_args:
|
| 38 |
+
if not isinstance(extra_args, list):
|
| 39 |
+
extra_args = [extra_args]
|
| 40 |
+
cmd = extra_args
|
| 41 |
+
if run_doctests:
|
| 42 |
+
cmd = [
|
| 43 |
+
"--doctest-modules",
|
| 44 |
+
"--doctest-cython",
|
| 45 |
+
f"--ignore={os.path.join(PKG, 'tests')}",
|
| 46 |
+
]
|
| 47 |
+
cmd += [PKG]
|
| 48 |
+
joined = " ".join(cmd)
|
| 49 |
+
print(f"running: pytest {joined}")
|
| 50 |
+
sys.exit(pytest.main(cmd))
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
__all__ = ["test"]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_validators.py
ADDED
|
@@ -0,0 +1,456 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Module that contains many useful utilities
|
| 3 |
+
for validating data or function arguments
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
from collections.abc import (
|
| 8 |
+
Iterable,
|
| 9 |
+
Sequence,
|
| 10 |
+
)
|
| 11 |
+
from typing import (
|
| 12 |
+
TypeVar,
|
| 13 |
+
overload,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
import numpy as np
|
| 17 |
+
|
| 18 |
+
from pandas._libs import lib
|
| 19 |
+
|
| 20 |
+
from pandas.core.dtypes.common import (
|
| 21 |
+
is_bool,
|
| 22 |
+
is_integer,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
BoolishT = TypeVar("BoolishT", bool, int)
|
| 26 |
+
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
|
| 30 |
+
"""
|
| 31 |
+
Checks whether 'args' has length of at most 'compat_args'. Raises
|
| 32 |
+
a TypeError if that is not the case, similar to in Python when a
|
| 33 |
+
function is called with too many arguments.
|
| 34 |
+
"""
|
| 35 |
+
if max_fname_arg_count < 0:
|
| 36 |
+
raise ValueError("'max_fname_arg_count' must be non-negative")
|
| 37 |
+
|
| 38 |
+
if len(args) > len(compat_args):
|
| 39 |
+
max_arg_count = len(compat_args) + max_fname_arg_count
|
| 40 |
+
actual_arg_count = len(args) + max_fname_arg_count
|
| 41 |
+
argument = "argument" if max_arg_count == 1 else "arguments"
|
| 42 |
+
|
| 43 |
+
raise TypeError(
|
| 44 |
+
f"{fname}() takes at most {max_arg_count} {argument} "
|
| 45 |
+
f"({actual_arg_count} given)"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
|
| 50 |
+
"""
|
| 51 |
+
Check that the keys in `arg_val_dict` are mapped to their
|
| 52 |
+
default values as specified in `compat_args`.
|
| 53 |
+
|
| 54 |
+
Note that this function is to be called only when it has been
|
| 55 |
+
checked that arg_val_dict.keys() is a subset of compat_args
|
| 56 |
+
"""
|
| 57 |
+
for key in arg_val_dict:
|
| 58 |
+
# try checking equality directly with '=' operator,
|
| 59 |
+
# as comparison may have been overridden for the left
|
| 60 |
+
# hand object
|
| 61 |
+
try:
|
| 62 |
+
v1 = arg_val_dict[key]
|
| 63 |
+
v2 = compat_args[key]
|
| 64 |
+
|
| 65 |
+
# check for None-ness otherwise we could end up
|
| 66 |
+
# comparing a numpy array vs None
|
| 67 |
+
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
|
| 68 |
+
match = False
|
| 69 |
+
else:
|
| 70 |
+
match = v1 == v2
|
| 71 |
+
|
| 72 |
+
if not is_bool(match):
|
| 73 |
+
raise ValueError("'match' is not a boolean")
|
| 74 |
+
|
| 75 |
+
# could not compare them directly, so try comparison
|
| 76 |
+
# using the 'is' operator
|
| 77 |
+
except ValueError:
|
| 78 |
+
match = arg_val_dict[key] is compat_args[key]
|
| 79 |
+
|
| 80 |
+
if not match:
|
| 81 |
+
raise ValueError(
|
| 82 |
+
f"the '{key}' parameter is not supported in "
|
| 83 |
+
f"the pandas implementation of {fname}()"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
|
| 88 |
+
"""
|
| 89 |
+
Checks whether the length of the `*args` argument passed into a function
|
| 90 |
+
has at most `len(compat_args)` arguments and whether or not all of these
|
| 91 |
+
elements in `args` are set to their default values.
|
| 92 |
+
|
| 93 |
+
Parameters
|
| 94 |
+
----------
|
| 95 |
+
fname : str
|
| 96 |
+
The name of the function being passed the `*args` parameter
|
| 97 |
+
args : tuple
|
| 98 |
+
The `*args` parameter passed into a function
|
| 99 |
+
max_fname_arg_count : int
|
| 100 |
+
The maximum number of arguments that the function `fname`
|
| 101 |
+
can accept, excluding those in `args`. Used for displaying
|
| 102 |
+
appropriate error messages. Must be non-negative.
|
| 103 |
+
compat_args : dict
|
| 104 |
+
A dictionary of keys and their associated default values.
|
| 105 |
+
In order to accommodate buggy behaviour in some versions of `numpy`,
|
| 106 |
+
where a signature displayed keyword arguments but then passed those
|
| 107 |
+
arguments **positionally** internally when calling downstream
|
| 108 |
+
implementations, a dict ensures that the original
|
| 109 |
+
order of the keyword arguments is enforced.
|
| 110 |
+
|
| 111 |
+
Raises
|
| 112 |
+
------
|
| 113 |
+
TypeError
|
| 114 |
+
If `args` contains more values than there are `compat_args`
|
| 115 |
+
ValueError
|
| 116 |
+
If `args` contains values that do not correspond to those
|
| 117 |
+
of the default values specified in `compat_args`
|
| 118 |
+
"""
|
| 119 |
+
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
|
| 120 |
+
|
| 121 |
+
# We do this so that we can provide a more informative
|
| 122 |
+
# error message about the parameters that we are not
|
| 123 |
+
# supporting in the pandas implementation of 'fname'
|
| 124 |
+
kwargs = dict(zip(compat_args, args))
|
| 125 |
+
_check_for_default_values(fname, kwargs, compat_args)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
|
| 129 |
+
"""
|
| 130 |
+
Checks whether 'kwargs' contains any keys that are not
|
| 131 |
+
in 'compat_args' and raises a TypeError if there is one.
|
| 132 |
+
"""
|
| 133 |
+
# set(dict) --> set of the dictionary's keys
|
| 134 |
+
diff = set(kwargs) - set(compat_args)
|
| 135 |
+
|
| 136 |
+
if diff:
|
| 137 |
+
bad_arg = next(iter(diff))
|
| 138 |
+
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def validate_kwargs(fname, kwargs, compat_args) -> None:
|
| 142 |
+
"""
|
| 143 |
+
Checks whether parameters passed to the **kwargs argument in a
|
| 144 |
+
function `fname` are valid parameters as specified in `*compat_args`
|
| 145 |
+
and whether or not they are set to their default values.
|
| 146 |
+
|
| 147 |
+
Parameters
|
| 148 |
+
----------
|
| 149 |
+
fname : str
|
| 150 |
+
The name of the function being passed the `**kwargs` parameter
|
| 151 |
+
kwargs : dict
|
| 152 |
+
The `**kwargs` parameter passed into `fname`
|
| 153 |
+
compat_args: dict
|
| 154 |
+
A dictionary of keys that `kwargs` is allowed to have and their
|
| 155 |
+
associated default values
|
| 156 |
+
|
| 157 |
+
Raises
|
| 158 |
+
------
|
| 159 |
+
TypeError if `kwargs` contains keys not in `compat_args`
|
| 160 |
+
ValueError if `kwargs` contains keys in `compat_args` that do not
|
| 161 |
+
map to the default values specified in `compat_args`
|
| 162 |
+
"""
|
| 163 |
+
kwds = kwargs.copy()
|
| 164 |
+
_check_for_invalid_keys(fname, kwargs, compat_args)
|
| 165 |
+
_check_for_default_values(fname, kwds, compat_args)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def validate_args_and_kwargs(
|
| 169 |
+
fname, args, kwargs, max_fname_arg_count, compat_args
|
| 170 |
+
) -> None:
|
| 171 |
+
"""
|
| 172 |
+
Checks whether parameters passed to the *args and **kwargs argument in a
|
| 173 |
+
function `fname` are valid parameters as specified in `*compat_args`
|
| 174 |
+
and whether or not they are set to their default values.
|
| 175 |
+
|
| 176 |
+
Parameters
|
| 177 |
+
----------
|
| 178 |
+
fname: str
|
| 179 |
+
The name of the function being passed the `**kwargs` parameter
|
| 180 |
+
args: tuple
|
| 181 |
+
The `*args` parameter passed into a function
|
| 182 |
+
kwargs: dict
|
| 183 |
+
The `**kwargs` parameter passed into `fname`
|
| 184 |
+
max_fname_arg_count: int
|
| 185 |
+
The minimum number of arguments that the function `fname`
|
| 186 |
+
requires, excluding those in `args`. Used for displaying
|
| 187 |
+
appropriate error messages. Must be non-negative.
|
| 188 |
+
compat_args: dict
|
| 189 |
+
A dictionary of keys that `kwargs` is allowed to
|
| 190 |
+
have and their associated default values.
|
| 191 |
+
|
| 192 |
+
Raises
|
| 193 |
+
------
|
| 194 |
+
TypeError if `args` contains more values than there are
|
| 195 |
+
`compat_args` OR `kwargs` contains keys not in `compat_args`
|
| 196 |
+
ValueError if `args` contains values not at the default value (`None`)
|
| 197 |
+
`kwargs` contains keys in `compat_args` that do not map to the default
|
| 198 |
+
value as specified in `compat_args`
|
| 199 |
+
|
| 200 |
+
See Also
|
| 201 |
+
--------
|
| 202 |
+
validate_args : Purely args validation.
|
| 203 |
+
validate_kwargs : Purely kwargs validation.
|
| 204 |
+
|
| 205 |
+
"""
|
| 206 |
+
# Check that the total number of arguments passed in (i.e.
|
| 207 |
+
# args and kwargs) does not exceed the length of compat_args
|
| 208 |
+
_check_arg_length(
|
| 209 |
+
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Check there is no overlap with the positional and keyword
|
| 213 |
+
# arguments, similar to what is done in actual Python functions
|
| 214 |
+
args_dict = dict(zip(compat_args, args))
|
| 215 |
+
|
| 216 |
+
for key in args_dict:
|
| 217 |
+
if key in kwargs:
|
| 218 |
+
raise TypeError(
|
| 219 |
+
f"{fname}() got multiple values for keyword argument '{key}'"
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
kwargs.update(args_dict)
|
| 223 |
+
validate_kwargs(fname, kwargs, compat_args)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def validate_bool_kwarg(
|
| 227 |
+
value: BoolishNoneT,
|
| 228 |
+
arg_name: str,
|
| 229 |
+
none_allowed: bool = True,
|
| 230 |
+
int_allowed: bool = False,
|
| 231 |
+
) -> BoolishNoneT:
|
| 232 |
+
"""
|
| 233 |
+
Ensure that argument passed in arg_name can be interpreted as boolean.
|
| 234 |
+
|
| 235 |
+
Parameters
|
| 236 |
+
----------
|
| 237 |
+
value : bool
|
| 238 |
+
Value to be validated.
|
| 239 |
+
arg_name : str
|
| 240 |
+
Name of the argument. To be reflected in the error message.
|
| 241 |
+
none_allowed : bool, default True
|
| 242 |
+
Whether to consider None to be a valid boolean.
|
| 243 |
+
int_allowed : bool, default False
|
| 244 |
+
Whether to consider integer value to be a valid boolean.
|
| 245 |
+
|
| 246 |
+
Returns
|
| 247 |
+
-------
|
| 248 |
+
value
|
| 249 |
+
The same value as input.
|
| 250 |
+
|
| 251 |
+
Raises
|
| 252 |
+
------
|
| 253 |
+
ValueError
|
| 254 |
+
If the value is not a valid boolean.
|
| 255 |
+
"""
|
| 256 |
+
good_value = is_bool(value)
|
| 257 |
+
if none_allowed:
|
| 258 |
+
good_value = good_value or (value is None)
|
| 259 |
+
|
| 260 |
+
if int_allowed:
|
| 261 |
+
good_value = good_value or isinstance(value, int)
|
| 262 |
+
|
| 263 |
+
if not good_value:
|
| 264 |
+
raise ValueError(
|
| 265 |
+
f'For argument "{arg_name}" expected type bool, received '
|
| 266 |
+
f"type {type(value).__name__}."
|
| 267 |
+
)
|
| 268 |
+
return value # pyright: ignore[reportGeneralTypeIssues]
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
|
| 272 |
+
"""
|
| 273 |
+
Validate the keyword arguments to 'fillna'.
|
| 274 |
+
|
| 275 |
+
This checks that exactly one of 'value' and 'method' is specified.
|
| 276 |
+
If 'method' is specified, this validates that it's a valid method.
|
| 277 |
+
|
| 278 |
+
Parameters
|
| 279 |
+
----------
|
| 280 |
+
value, method : object
|
| 281 |
+
The 'value' and 'method' keyword arguments for 'fillna'.
|
| 282 |
+
validate_scalar_dict_value : bool, default True
|
| 283 |
+
Whether to validate that 'value' is a scalar or dict. Specifically,
|
| 284 |
+
validate that it is not a list or tuple.
|
| 285 |
+
|
| 286 |
+
Returns
|
| 287 |
+
-------
|
| 288 |
+
value, method : object
|
| 289 |
+
"""
|
| 290 |
+
from pandas.core.missing import clean_fill_method
|
| 291 |
+
|
| 292 |
+
if value is None and method is None:
|
| 293 |
+
raise ValueError("Must specify a fill 'value' or 'method'.")
|
| 294 |
+
if value is None and method is not None:
|
| 295 |
+
method = clean_fill_method(method)
|
| 296 |
+
|
| 297 |
+
elif value is not None and method is None:
|
| 298 |
+
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
|
| 299 |
+
raise TypeError(
|
| 300 |
+
'"value" parameter must be a scalar or dict, but '
|
| 301 |
+
f'you passed a "{type(value).__name__}"'
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
elif value is not None and method is not None:
|
| 305 |
+
raise ValueError("Cannot specify both 'value' and 'method'.")
|
| 306 |
+
|
| 307 |
+
return value, method
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
|
| 311 |
+
"""
|
| 312 |
+
Validate percentiles (used by describe and quantile).
|
| 313 |
+
|
| 314 |
+
This function checks if the given float or iterable of floats is a valid percentile
|
| 315 |
+
otherwise raises a ValueError.
|
| 316 |
+
|
| 317 |
+
Parameters
|
| 318 |
+
----------
|
| 319 |
+
q: float or iterable of floats
|
| 320 |
+
A single percentile or an iterable of percentiles.
|
| 321 |
+
|
| 322 |
+
Returns
|
| 323 |
+
-------
|
| 324 |
+
ndarray
|
| 325 |
+
An ndarray of the percentiles if valid.
|
| 326 |
+
|
| 327 |
+
Raises
|
| 328 |
+
------
|
| 329 |
+
ValueError if percentiles are not in given interval([0, 1]).
|
| 330 |
+
"""
|
| 331 |
+
q_arr = np.asarray(q)
|
| 332 |
+
# Don't change this to an f-string. The string formatting
|
| 333 |
+
# is too expensive for cases where we don't need it.
|
| 334 |
+
msg = "percentiles should all be in the interval [0, 1]"
|
| 335 |
+
if q_arr.ndim == 0:
|
| 336 |
+
if not 0 <= q_arr <= 1:
|
| 337 |
+
raise ValueError(msg)
|
| 338 |
+
else:
|
| 339 |
+
if not all(0 <= qs <= 1 for qs in q_arr):
|
| 340 |
+
raise ValueError(msg)
|
| 341 |
+
return q_arr
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
@overload
|
| 345 |
+
def validate_ascending(ascending: BoolishT) -> BoolishT:
|
| 346 |
+
...
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
@overload
|
| 350 |
+
def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
|
| 351 |
+
...
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def validate_ascending(
|
| 355 |
+
ascending: bool | int | Sequence[BoolishT],
|
| 356 |
+
) -> bool | int | list[BoolishT]:
|
| 357 |
+
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
|
| 358 |
+
kwargs = {"none_allowed": False, "int_allowed": True}
|
| 359 |
+
if not isinstance(ascending, Sequence):
|
| 360 |
+
return validate_bool_kwarg(ascending, "ascending", **kwargs)
|
| 361 |
+
|
| 362 |
+
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
|
| 366 |
+
"""
|
| 367 |
+
Check that the `closed` argument is among [None, "left", "right"]
|
| 368 |
+
|
| 369 |
+
Parameters
|
| 370 |
+
----------
|
| 371 |
+
closed : {None, "left", "right"}
|
| 372 |
+
|
| 373 |
+
Returns
|
| 374 |
+
-------
|
| 375 |
+
left_closed : bool
|
| 376 |
+
right_closed : bool
|
| 377 |
+
|
| 378 |
+
Raises
|
| 379 |
+
------
|
| 380 |
+
ValueError : if argument is not among valid values
|
| 381 |
+
"""
|
| 382 |
+
left_closed = False
|
| 383 |
+
right_closed = False
|
| 384 |
+
|
| 385 |
+
if closed is None:
|
| 386 |
+
left_closed = True
|
| 387 |
+
right_closed = True
|
| 388 |
+
elif closed == "left":
|
| 389 |
+
left_closed = True
|
| 390 |
+
elif closed == "right":
|
| 391 |
+
right_closed = True
|
| 392 |
+
else:
|
| 393 |
+
raise ValueError("Closed has to be either 'left', 'right' or None")
|
| 394 |
+
|
| 395 |
+
return left_closed, right_closed
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
|
| 399 |
+
"""
|
| 400 |
+
Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
|
| 401 |
+
|
| 402 |
+
Parameters
|
| 403 |
+
----------
|
| 404 |
+
inclusive : {"both", "neither", "left", "right"}
|
| 405 |
+
|
| 406 |
+
Returns
|
| 407 |
+
-------
|
| 408 |
+
left_right_inclusive : tuple[bool, bool]
|
| 409 |
+
|
| 410 |
+
Raises
|
| 411 |
+
------
|
| 412 |
+
ValueError : if argument is not among valid values
|
| 413 |
+
"""
|
| 414 |
+
left_right_inclusive: tuple[bool, bool] | None = None
|
| 415 |
+
|
| 416 |
+
if isinstance(inclusive, str):
|
| 417 |
+
left_right_inclusive = {
|
| 418 |
+
"both": (True, True),
|
| 419 |
+
"left": (True, False),
|
| 420 |
+
"right": (False, True),
|
| 421 |
+
"neither": (False, False),
|
| 422 |
+
}.get(inclusive)
|
| 423 |
+
|
| 424 |
+
if left_right_inclusive is None:
|
| 425 |
+
raise ValueError(
|
| 426 |
+
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
return left_right_inclusive
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
def validate_insert_loc(loc: int, length: int) -> int:
|
| 433 |
+
"""
|
| 434 |
+
Check that we have an integer between -length and length, inclusive.
|
| 435 |
+
|
| 436 |
+
Standardize negative loc to within [0, length].
|
| 437 |
+
|
| 438 |
+
The exceptions we raise on failure match np.insert.
|
| 439 |
+
"""
|
| 440 |
+
if not is_integer(loc):
|
| 441 |
+
raise TypeError(f"loc must be an integer between -{length} and {length}")
|
| 442 |
+
|
| 443 |
+
if loc < 0:
|
| 444 |
+
loc += length
|
| 445 |
+
if not 0 <= loc <= length:
|
| 446 |
+
raise IndexError(f"loc must be an integer between -{length} and {length}")
|
| 447 |
+
return loc # pyright: ignore[reportGeneralTypeIssues]
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def check_dtype_backend(dtype_backend) -> None:
|
| 451 |
+
if dtype_backend is not lib.no_default:
|
| 452 |
+
if dtype_backend not in ["numpy_nullable", "pyarrow"]:
|
| 453 |
+
raise ValueError(
|
| 454 |
+
f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
|
| 455 |
+
f"'pyarrow' are allowed.",
|
| 456 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (28.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc
ADDED
|
Binary file (12.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import dataclasses
|
| 3 |
+
import glob
|
| 4 |
+
import inspect
|
| 5 |
+
from os.path import basename, dirname, isfile, join
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from torch._export.db.case import (
|
| 9 |
+
_EXAMPLE_CASES,
|
| 10 |
+
_EXAMPLE_CONFLICT_CASES,
|
| 11 |
+
_EXAMPLE_REWRITE_CASES,
|
| 12 |
+
SupportLevel,
|
| 13 |
+
export_case,
|
| 14 |
+
ExportCase,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _collect_examples():
|
| 19 |
+
case_names = glob.glob(join(dirname(__file__), "*.py"))
|
| 20 |
+
case_names = [
|
| 21 |
+
basename(f)[:-3] for f in case_names if isfile(f) and not f.endswith("__init__.py")
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
case_fields = {f.name for f in dataclasses.fields(ExportCase)}
|
| 25 |
+
for case_name in case_names:
|
| 26 |
+
case = __import__(case_name, globals(), locals(), [], 1)
|
| 27 |
+
variables = [name for name in dir(case) if name in case_fields]
|
| 28 |
+
export_case(**{v: getattr(case, v) for v in variables})(case.model)
|
| 29 |
+
|
| 30 |
+
_collect_examples()
|
| 31 |
+
|
| 32 |
+
def all_examples():
|
| 33 |
+
return _EXAMPLE_CASES
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if len(_EXAMPLE_CONFLICT_CASES) > 0:
|
| 37 |
+
|
| 38 |
+
def get_name(case):
|
| 39 |
+
model = case.model
|
| 40 |
+
if isinstance(model, torch.nn.Module):
|
| 41 |
+
model = type(model)
|
| 42 |
+
return model.__name__
|
| 43 |
+
|
| 44 |
+
msg = "Error on conflict export case name.\n"
|
| 45 |
+
for case_name, cases in _EXAMPLE_CONFLICT_CASES.items():
|
| 46 |
+
msg += f"Case name {case_name} is associated with multiple cases:\n "
|
| 47 |
+
msg += f"[{','.join(map(get_name, cases))}]\n"
|
| 48 |
+
|
| 49 |
+
raise RuntimeError(msg)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def filter_examples_by_support_level(support_level: SupportLevel):
|
| 53 |
+
return {
|
| 54 |
+
key: val
|
| 55 |
+
for key, val in all_examples().items()
|
| 56 |
+
if val.support_level == support_level
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def get_rewrite_cases(case):
|
| 61 |
+
return _EXAMPLE_REWRITE_CASES.get(case.name, [])
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/assume_constant_result.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
import torch._dynamo as torchdynamo
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class AssumeConstantResult(torch.nn.Module):
|
| 7 |
+
"""
|
| 8 |
+
Applying `assume_constant_result` decorator to burn make non-tracable code as constant.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
@torchdynamo.assume_constant_result
|
| 12 |
+
def get_item(self, y):
|
| 13 |
+
return y.int().item()
|
| 14 |
+
|
| 15 |
+
def forward(self, x, y):
|
| 16 |
+
return x[: self.get_item(y)]
|
| 17 |
+
|
| 18 |
+
example_args = (torch.randn(3, 2), torch.tensor(4))
|
| 19 |
+
tags = {"torch.escape-hatch"}
|
| 20 |
+
model = AssumeConstantResult()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/autograd_function.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class MyAutogradFunction(torch.autograd.Function):
|
| 5 |
+
@staticmethod
|
| 6 |
+
# pyrefly: ignore [bad-override]
|
| 7 |
+
def forward(ctx, x):
|
| 8 |
+
return x.clone()
|
| 9 |
+
|
| 10 |
+
@staticmethod
|
| 11 |
+
# pyrefly: ignore [bad-override]
|
| 12 |
+
def backward(ctx, grad_output):
|
| 13 |
+
return grad_output + 1
|
| 14 |
+
|
| 15 |
+
class AutogradFunction(torch.nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
TorchDynamo does not keep track of backward() on autograd functions. We recommend to
|
| 18 |
+
use `allow_in_graph` to mitigate this problem.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def forward(self, x):
|
| 22 |
+
return MyAutogradFunction.apply(x)
|
| 23 |
+
|
| 24 |
+
example_args = (torch.randn(3, 2),)
|
| 25 |
+
model = AutogradFunction()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/class_method.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class ClassMethod(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
Class methods are inlined during tracing.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
@classmethod
|
| 10 |
+
def method(cls, x):
|
| 11 |
+
return x + 1
|
| 12 |
+
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
+
super().__init__()
|
| 15 |
+
self.linear = torch.nn.Linear(4, 2)
|
| 16 |
+
|
| 17 |
+
def forward(self, x):
|
| 18 |
+
x = self.linear(x)
|
| 19 |
+
return self.method(x) * self.__class__.method(x) * type(self).method(x)
|
| 20 |
+
|
| 21 |
+
example_args = (torch.randn(3, 4),)
|
| 22 |
+
model = ClassMethod()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_class_method.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from functorch.experimental.control_flow import cond
|
| 5 |
+
|
| 6 |
+
class MySubModule(torch.nn.Module):
|
| 7 |
+
def foo(self, x):
|
| 8 |
+
return x.cos()
|
| 9 |
+
|
| 10 |
+
def forward(self, x):
|
| 11 |
+
return self.foo(x)
|
| 12 |
+
|
| 13 |
+
class CondBranchClassMethod(torch.nn.Module):
|
| 14 |
+
"""
|
| 15 |
+
The branch functions (`true_fn` and `false_fn`) passed to cond() must follow these rules:
|
| 16 |
+
- both branches must take the same args, which must also match the branch args passed to cond.
|
| 17 |
+
- both branches must return a single tensor
|
| 18 |
+
- returned tensor must have the same tensor metadata, e.g. shape and dtype
|
| 19 |
+
- branch function can be free function, nested function, lambda, class methods
|
| 20 |
+
- branch function can not have closure variables
|
| 21 |
+
- no inplace mutations on inputs or global variables
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
This example demonstrates using class method in cond().
|
| 25 |
+
|
| 26 |
+
NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
def __init__(self) -> None:
|
| 30 |
+
super().__init__()
|
| 31 |
+
self.subm = MySubModule()
|
| 32 |
+
|
| 33 |
+
def bar(self, x):
|
| 34 |
+
return x.sin()
|
| 35 |
+
|
| 36 |
+
def forward(self, x):
|
| 37 |
+
return cond(x.shape[0] <= 2, self.subm.forward, self.bar, [x])
|
| 38 |
+
|
| 39 |
+
example_args = (torch.randn(3),)
|
| 40 |
+
tags = {
|
| 41 |
+
"torch.cond",
|
| 42 |
+
"torch.dynamic-shape",
|
| 43 |
+
}
|
| 44 |
+
model = CondBranchClassMethod()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nested_function.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from functorch.experimental.control_flow import cond
|
| 5 |
+
|
| 6 |
+
class CondBranchNestedFunction(torch.nn.Module):
|
| 7 |
+
"""
|
| 8 |
+
The branch functions (`true_fn` and `false_fn`) passed to cond() must follow these rules:
|
| 9 |
+
- both branches must take the same args, which must also match the branch args passed to cond.
|
| 10 |
+
- both branches must return a single tensor
|
| 11 |
+
- returned tensor must have the same tensor metadata, e.g. shape and dtype
|
| 12 |
+
- branch function can be free function, nested function, lambda, class methods
|
| 13 |
+
- branch function can not have closure variables
|
| 14 |
+
- no inplace mutations on inputs or global variables
|
| 15 |
+
|
| 16 |
+
This example demonstrates using nested function in cond().
|
| 17 |
+
|
| 18 |
+
NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def forward(self, x):
|
| 22 |
+
def true_fn(x):
|
| 23 |
+
def inner_true_fn(y):
|
| 24 |
+
return x + y
|
| 25 |
+
|
| 26 |
+
return inner_true_fn(x)
|
| 27 |
+
|
| 28 |
+
def false_fn(x):
|
| 29 |
+
def inner_false_fn(y):
|
| 30 |
+
return x - y
|
| 31 |
+
|
| 32 |
+
return inner_false_fn(x)
|
| 33 |
+
|
| 34 |
+
return cond(x.shape[0] < 10, true_fn, false_fn, [x])
|
| 35 |
+
|
| 36 |
+
example_args = (torch.randn(3),)
|
| 37 |
+
tags = {
|
| 38 |
+
"torch.cond",
|
| 39 |
+
"torch.dynamic-shape",
|
| 40 |
+
}
|
| 41 |
+
model = CondBranchNestedFunction()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nonlocal_variables.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from functorch.experimental.control_flow import cond
|
| 5 |
+
|
| 6 |
+
class CondBranchNonlocalVariables(torch.nn.Module):
|
| 7 |
+
"""
|
| 8 |
+
The branch functions (`true_fn` and `false_fn`) passed to cond() must follow these rules:
|
| 9 |
+
- both branches must take the same args, which must also match the branch args passed to cond.
|
| 10 |
+
- both branches must return a single tensor
|
| 11 |
+
- returned tensor must have the same tensor metadata, e.g. shape and dtype
|
| 12 |
+
- branch function can be free function, nested function, lambda, class methods
|
| 13 |
+
- branch function can not have closure variables
|
| 14 |
+
- no inplace mutations on inputs or global variables
|
| 15 |
+
|
| 16 |
+
This example demonstrates how to rewrite code to avoid capturing closure variables in branch functions.
|
| 17 |
+
|
| 18 |
+
The code below will not work because capturing closure variables is not supported.
|
| 19 |
+
```
|
| 20 |
+
my_tensor_var = x + 100
|
| 21 |
+
my_primitive_var = 3.14
|
| 22 |
+
|
| 23 |
+
def true_fn(y):
|
| 24 |
+
nonlocal my_tensor_var, my_primitive_var
|
| 25 |
+
return y + my_tensor_var + my_primitive_var
|
| 26 |
+
|
| 27 |
+
def false_fn(y):
|
| 28 |
+
nonlocal my_tensor_var, my_primitive_var
|
| 29 |
+
return y - my_tensor_var - my_primitive_var
|
| 30 |
+
|
| 31 |
+
return cond(x.shape[0] > 5, true_fn, false_fn, [x])
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def forward(self, x):
|
| 38 |
+
my_tensor_var = x + 100
|
| 39 |
+
my_primitive_var = 3.14
|
| 40 |
+
|
| 41 |
+
def true_fn(x, y, z):
|
| 42 |
+
return x + y + z
|
| 43 |
+
|
| 44 |
+
def false_fn(x, y, z):
|
| 45 |
+
return x - y - z
|
| 46 |
+
|
| 47 |
+
return cond(
|
| 48 |
+
x.shape[0] > 5,
|
| 49 |
+
true_fn,
|
| 50 |
+
false_fn,
|
| 51 |
+
[x, my_tensor_var, torch.tensor(my_primitive_var)],
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
example_args = (torch.randn(6),)
|
| 55 |
+
tags = {
|
| 56 |
+
"torch.cond",
|
| 57 |
+
"torch.dynamic-shape",
|
| 58 |
+
}
|
| 59 |
+
model = CondBranchNonlocalVariables()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_closed_over_variable.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from functorch.experimental.control_flow import cond
|
| 5 |
+
|
| 6 |
+
class CondClosedOverVariable(torch.nn.Module):
|
| 7 |
+
"""
|
| 8 |
+
torch.cond() supports branches closed over arbitrary variables.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def forward(self, pred, x):
|
| 12 |
+
def true_fn(val):
|
| 13 |
+
return x * 2
|
| 14 |
+
|
| 15 |
+
def false_fn(val):
|
| 16 |
+
return x - 2
|
| 17 |
+
|
| 18 |
+
return cond(pred, true_fn, false_fn, [x + 1])
|
| 19 |
+
|
| 20 |
+
example_args = (torch.tensor(True), torch.randn(3, 2))
|
| 21 |
+
tags = {"torch.cond", "python.closure"}
|
| 22 |
+
model = CondClosedOverVariable()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_operands.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from torch.export import Dim
|
| 5 |
+
|
| 6 |
+
x = torch.randn(3, 2)
|
| 7 |
+
y = torch.randn(2)
|
| 8 |
+
dim0_x = Dim("dim0_x")
|
| 9 |
+
|
| 10 |
+
class CondOperands(torch.nn.Module):
|
| 11 |
+
"""
|
| 12 |
+
The operands passed to cond() must be:
|
| 13 |
+
- a list of tensors
|
| 14 |
+
- match arguments of `true_fn` and `false_fn`
|
| 15 |
+
|
| 16 |
+
NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def forward(self, x, y):
|
| 20 |
+
def true_fn(x, y):
|
| 21 |
+
return x + y
|
| 22 |
+
|
| 23 |
+
def false_fn(x, y):
|
| 24 |
+
return x - y
|
| 25 |
+
|
| 26 |
+
return torch.cond(x.shape[0] > 2, true_fn, false_fn, [x, y])
|
| 27 |
+
|
| 28 |
+
example_args = (x, y)
|
| 29 |
+
tags = {
|
| 30 |
+
"torch.cond",
|
| 31 |
+
"torch.dynamic-shape",
|
| 32 |
+
}
|
| 33 |
+
extra_inputs = (torch.randn(2, 2), torch.randn(2))
|
| 34 |
+
dynamic_shapes = {"x": {0: dim0_x}, "y": None}
|
| 35 |
+
model = CondOperands()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_predicate.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from functorch.experimental.control_flow import cond
|
| 5 |
+
|
| 6 |
+
class CondPredicate(torch.nn.Module):
|
| 7 |
+
"""
|
| 8 |
+
The conditional statement (aka predicate) passed to cond() must be one of the following:
|
| 9 |
+
- torch.Tensor with a single element
|
| 10 |
+
- boolean expression
|
| 11 |
+
|
| 12 |
+
NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def forward(self, x):
|
| 16 |
+
pred = x.dim() > 2 and x.shape[2] > 10
|
| 17 |
+
|
| 18 |
+
return cond(pred, lambda x: x.cos(), lambda y: y.sin(), [x])
|
| 19 |
+
|
| 20 |
+
example_args = (torch.randn(6, 4, 3),)
|
| 21 |
+
tags = {
|
| 22 |
+
"torch.cond",
|
| 23 |
+
"torch.dynamic-shape",
|
| 24 |
+
}
|
| 25 |
+
model = CondPredicate()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_size_example.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class ConstrainAsSizeExample(torch.nn.Module):
|
| 6 |
+
"""
|
| 7 |
+
If the value is not known at tracing time, you can provide hint so that we
|
| 8 |
+
can trace further. Please look at torch._check APIs.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def forward(self, x):
|
| 12 |
+
a = x.item()
|
| 13 |
+
torch._check(a >= 0)
|
| 14 |
+
torch._check(a <= 5)
|
| 15 |
+
return torch.zeros((a, 5))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
example_args = (torch.tensor(4),)
|
| 19 |
+
tags = {
|
| 20 |
+
"torch.dynamic-value",
|
| 21 |
+
"torch.escape-hatch",
|
| 22 |
+
}
|
| 23 |
+
model = ConstrainAsSizeExample()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_value_example.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class ConstrainAsValueExample(torch.nn.Module):
|
| 6 |
+
"""
|
| 7 |
+
If the value is not known at tracing time, you can provide hint so that we
|
| 8 |
+
can trace further. Please look at torch._check API.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def forward(self, x, y):
|
| 12 |
+
a = x.item()
|
| 13 |
+
torch._check(a >= 0)
|
| 14 |
+
torch._check(a <= 5)
|
| 15 |
+
|
| 16 |
+
if a < 6:
|
| 17 |
+
return y.sin()
|
| 18 |
+
return y.cos()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
example_args = (torch.tensor(4), torch.randn(5, 5))
|
| 22 |
+
tags = {
|
| 23 |
+
"torch.dynamic-value",
|
| 24 |
+
"torch.escape-hatch",
|
| 25 |
+
}
|
| 26 |
+
model = ConstrainAsValueExample()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/decorator.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import functools
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
def test_decorator(func):
|
| 7 |
+
@functools.wraps(func)
|
| 8 |
+
def wrapper(*args, **kwargs):
|
| 9 |
+
return func(*args, **kwargs) + 1
|
| 10 |
+
|
| 11 |
+
return wrapper
|
| 12 |
+
|
| 13 |
+
class Decorator(torch.nn.Module):
|
| 14 |
+
"""
|
| 15 |
+
Decorators calls are inlined into the exported function during tracing.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
@test_decorator
|
| 19 |
+
def forward(self, x, y):
|
| 20 |
+
return x + y
|
| 21 |
+
|
| 22 |
+
example_args = (torch.randn(3, 2), torch.randn(3, 2))
|
| 23 |
+
model = Decorator()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dictionary.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class Dictionary(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
Dictionary structures are inlined and flattened along tracing.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def forward(self, x, y):
|
| 10 |
+
elements = {}
|
| 11 |
+
elements["x2"] = x * x
|
| 12 |
+
y = y * elements["x2"]
|
| 13 |
+
return {"y": y}
|
| 14 |
+
|
| 15 |
+
example_args = (torch.randn(3, 2), torch.tensor(4))
|
| 16 |
+
tags = {"python.data-structure"}
|
| 17 |
+
model = Dictionary()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_assert.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class DynamicShapeAssert(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
A basic usage of python assertion.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def forward(self, x):
|
| 10 |
+
# assertion with error message
|
| 11 |
+
assert x.shape[0] > 2, f"{x.shape[0]} is greater than 2"
|
| 12 |
+
# assertion without error message
|
| 13 |
+
assert x.shape[0] > 1
|
| 14 |
+
return x
|
| 15 |
+
|
| 16 |
+
example_args = (torch.randn(3, 2),)
|
| 17 |
+
tags = {"python.assert"}
|
| 18 |
+
model = DynamicShapeAssert()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_constructor.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class DynamicShapeConstructor(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
Tensor constructors should be captured with dynamic shape inputs rather
|
| 7 |
+
than being baked in with static shape.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
def forward(self, x):
|
| 11 |
+
return torch.zeros(x.shape[0] * 2)
|
| 12 |
+
|
| 13 |
+
example_args = (torch.randn(3, 2),)
|
| 14 |
+
tags = {"torch.dynamic-shape"}
|
| 15 |
+
model = DynamicShapeConstructor()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_if_guard.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class DynamicShapeIfGuard(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
`if` statement with backed dynamic shape predicate will be specialized into
|
| 7 |
+
one particular branch and generate a guard. However, export will fail if the
|
| 8 |
+
the dimension is marked as dynamic shape from higher level API.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def forward(self, x):
|
| 12 |
+
if x.shape[0] == 3:
|
| 13 |
+
return x.cos()
|
| 14 |
+
|
| 15 |
+
return x.sin()
|
| 16 |
+
|
| 17 |
+
example_args = (torch.randn(3, 2, 2),)
|
| 18 |
+
tags = {"torch.dynamic-shape", "python.control-flow"}
|
| 19 |
+
model = DynamicShapeIfGuard()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_map.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from functorch.experimental.control_flow import map
|
| 5 |
+
|
| 6 |
+
class DynamicShapeMap(torch.nn.Module):
|
| 7 |
+
"""
|
| 8 |
+
functorch map() maps a function over the first tensor dimension.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def forward(self, xs, y):
|
| 12 |
+
def body(x, y):
|
| 13 |
+
return x + y
|
| 14 |
+
|
| 15 |
+
return map(body, xs, y)
|
| 16 |
+
|
| 17 |
+
example_args = (torch.randn(3, 2), torch.randn(2))
|
| 18 |
+
tags = {"torch.dynamic-shape", "torch.map"}
|
| 19 |
+
model = DynamicShapeMap()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_round.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from torch._export.db.case import SupportLevel
|
| 5 |
+
from torch.export import Dim
|
| 6 |
+
|
| 7 |
+
class DynamicShapeRound(torch.nn.Module):
|
| 8 |
+
"""
|
| 9 |
+
Calling round on dynamic shapes is not supported.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def forward(self, x):
|
| 13 |
+
return x[: round(x.shape[0] / 2)]
|
| 14 |
+
|
| 15 |
+
x = torch.randn(3, 2)
|
| 16 |
+
dim0_x = Dim("dim0_x")
|
| 17 |
+
example_args = (x,)
|
| 18 |
+
tags = {"torch.dynamic-shape", "python.builtin"}
|
| 19 |
+
support_level = SupportLevel.NOT_SUPPORTED_YET
|
| 20 |
+
dynamic_shapes = {"x": {0: dim0_x}}
|
| 21 |
+
model = DynamicShapeRound()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_slicing.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class DynamicShapeSlicing(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
Slices with dynamic shape arguments should be captured into the graph
|
| 7 |
+
rather than being baked in.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
def forward(self, x):
|
| 11 |
+
return x[: x.shape[0] - 2, x.shape[1] - 1 :: 2]
|
| 12 |
+
|
| 13 |
+
example_args = (torch.randn(3, 2),)
|
| 14 |
+
tags = {"torch.dynamic-shape"}
|
| 15 |
+
model = DynamicShapeSlicing()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/fn_with_kwargs.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class FnWithKwargs(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
Keyword arguments are not supported at the moment.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def forward(self, pos0, tuple0, *myargs, mykw0, **mykwargs):
|
| 10 |
+
out = pos0
|
| 11 |
+
for arg in tuple0:
|
| 12 |
+
out = out * arg
|
| 13 |
+
for arg in myargs:
|
| 14 |
+
out = out * arg
|
| 15 |
+
out = out * mykw0
|
| 16 |
+
out = out * mykwargs["input0"] * mykwargs["input1"]
|
| 17 |
+
return out
|
| 18 |
+
|
| 19 |
+
example_args = (
|
| 20 |
+
torch.randn(4),
|
| 21 |
+
(torch.randn(4), torch.randn(4)),
|
| 22 |
+
*[torch.randn(4), torch.randn(4)]
|
| 23 |
+
)
|
| 24 |
+
example_kwargs = {
|
| 25 |
+
"mykw0": torch.randn(4),
|
| 26 |
+
"input0": torch.randn(4),
|
| 27 |
+
"input1": torch.randn(4),
|
| 28 |
+
}
|
| 29 |
+
tags = {"python.data-structure"}
|
| 30 |
+
model = FnWithKwargs()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/list_contains.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class ListContains(torch.nn.Module):
|
| 5 |
+
"""
|
| 6 |
+
List containment relation can be checked on a dynamic shape or constants.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def forward(self, x):
|
| 10 |
+
assert x.size(-1) in [6, 2]
|
| 11 |
+
assert x.size(0) not in [4, 5, 6]
|
| 12 |
+
assert "monkey" not in ["cow", "pig"]
|
| 13 |
+
return x + x
|
| 14 |
+
|
| 15 |
+
example_args = (torch.randn(3, 2),)
|
| 16 |
+
tags = {"torch.dynamic-shape", "python.data-structure", "python.assert"}
|
| 17 |
+
model = ListContains()
|