Add files using upload-large-folder tool
Browse files- py311/lib/python3.11/site-packages/pandas/tests/arrays/period/test_astype.py +67 -0
- py311/lib/python3.11/site-packages/pandas/tests/arrays/string_/test_string.py +893 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py +9 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py +130 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py +21 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +317 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py +907 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py +2188 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py +1087 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/parser/test_concatenate_chunks.py +36 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_categorical.py +214 -0
- py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_read.py +417 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_constructors.py +51 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_contains.py +73 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_interval.py +87 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_overlaps.py +67 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_formats.py +201 -0
- py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_timezones.py +24 -0
py311/lib/python3.11/site-packages/pandas/tests/arrays/period/test_astype.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.dtypes.dtypes import PeriodDtype
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import pandas._testing as tm
|
| 8 |
+
from pandas.core.arrays import period_array
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
| 12 |
+
def test_astype_int(dtype):
|
| 13 |
+
# We choose to ignore the sign and size of integers for
|
| 14 |
+
# Period/Datetime/Timedelta astype
|
| 15 |
+
arr = period_array(["2000", "2001", None], freq="D")
|
| 16 |
+
|
| 17 |
+
if np.dtype(dtype) != np.int64:
|
| 18 |
+
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
|
| 19 |
+
arr.astype(dtype)
|
| 20 |
+
return
|
| 21 |
+
|
| 22 |
+
result = arr.astype(dtype)
|
| 23 |
+
expected = arr._ndarray.view("i8")
|
| 24 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_astype_copies():
|
| 28 |
+
arr = period_array(["2000", "2001", None], freq="D")
|
| 29 |
+
result = arr.astype(np.int64, copy=False)
|
| 30 |
+
|
| 31 |
+
# Add the `.base`, since we now use `.asi8` which returns a view.
|
| 32 |
+
# We could maybe override it in PeriodArray to return ._ndarray directly.
|
| 33 |
+
assert result.base is arr._ndarray
|
| 34 |
+
|
| 35 |
+
result = arr.astype(np.int64, copy=True)
|
| 36 |
+
assert result is not arr._ndarray
|
| 37 |
+
tm.assert_numpy_array_equal(result, arr._ndarray.view("i8"))
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_astype_categorical():
|
| 41 |
+
arr = period_array(["2000", "2001", "2001", None], freq="D")
|
| 42 |
+
result = arr.astype("category")
|
| 43 |
+
categories = pd.PeriodIndex(["2000", "2001"], freq="D")
|
| 44 |
+
expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
|
| 45 |
+
tm.assert_categorical_equal(result, expected)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_astype_period():
|
| 49 |
+
arr = period_array(["2000", "2001", None], freq="D")
|
| 50 |
+
result = arr.astype(PeriodDtype("M"))
|
| 51 |
+
expected = period_array(["2000", "2001", None], freq="M")
|
| 52 |
+
tm.assert_period_array_equal(result, expected)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
| 56 |
+
def test_astype_datetime(dtype):
|
| 57 |
+
arr = period_array(["2000", "2001", None], freq="D")
|
| 58 |
+
# slice off the [ns] so that the regex matches.
|
| 59 |
+
if dtype == "timedelta64[ns]":
|
| 60 |
+
with pytest.raises(TypeError, match=dtype[:-4]):
|
| 61 |
+
arr.astype(dtype)
|
| 62 |
+
|
| 63 |
+
else:
|
| 64 |
+
# GH#45038 allow period->dt64 because we allow dt64->period
|
| 65 |
+
result = arr.astype(dtype)
|
| 66 |
+
expected = pd.DatetimeIndex(["2000", "2001", pd.NaT], dtype=dtype)._data
|
| 67 |
+
tm.assert_datetime_array_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/arrays/string_/test_string.py
ADDED
|
@@ -0,0 +1,893 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module tests the functionality of StringArray and ArrowStringArray.
|
| 3 |
+
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
|
| 4 |
+
"""
|
| 5 |
+
import operator
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pytest
|
| 9 |
+
|
| 10 |
+
from pandas._config import using_string_dtype
|
| 11 |
+
|
| 12 |
+
from pandas.compat import HAS_PYARROW
|
| 13 |
+
from pandas.compat.pyarrow import (
|
| 14 |
+
pa_version_under12p0,
|
| 15 |
+
pa_version_under19p0,
|
| 16 |
+
)
|
| 17 |
+
import pandas.util._test_decorators as td
|
| 18 |
+
|
| 19 |
+
from pandas.core.dtypes.common import is_dtype_equal
|
| 20 |
+
|
| 21 |
+
import pandas as pd
|
| 22 |
+
import pandas._testing as tm
|
| 23 |
+
from pandas.core.arrays.string_ import StringArrayNumpySemantics
|
| 24 |
+
from pandas.core.arrays.string_arrow import (
|
| 25 |
+
ArrowStringArray,
|
| 26 |
+
ArrowStringArrayNumpySemantics,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@pytest.fixture
|
| 31 |
+
def dtype(string_dtype_arguments):
|
| 32 |
+
"""Fixture giving StringDtype from parametrized storage and na_value arguments"""
|
| 33 |
+
storage, na_value = string_dtype_arguments
|
| 34 |
+
return pd.StringDtype(storage=storage, na_value=na_value)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@pytest.fixture
|
| 38 |
+
def dtype2(string_dtype_arguments2):
|
| 39 |
+
storage, na_value = string_dtype_arguments2
|
| 40 |
+
return pd.StringDtype(storage=storage, na_value=na_value)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@pytest.fixture
|
| 44 |
+
def cls(dtype):
|
| 45 |
+
"""Fixture giving array type from parametrized 'dtype'"""
|
| 46 |
+
return dtype.construct_array_type()
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def string_dtype_highest_priority(dtype1, dtype2):
|
| 50 |
+
if HAS_PYARROW:
|
| 51 |
+
DTYPE_HIERARCHY = [
|
| 52 |
+
pd.StringDtype("python", na_value=np.nan),
|
| 53 |
+
pd.StringDtype("pyarrow", na_value=np.nan),
|
| 54 |
+
pd.StringDtype("python", na_value=pd.NA),
|
| 55 |
+
pd.StringDtype("pyarrow", na_value=pd.NA),
|
| 56 |
+
]
|
| 57 |
+
else:
|
| 58 |
+
DTYPE_HIERARCHY = [
|
| 59 |
+
pd.StringDtype("python", na_value=np.nan),
|
| 60 |
+
pd.StringDtype("python", na_value=pd.NA),
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
h1 = DTYPE_HIERARCHY.index(dtype1)
|
| 64 |
+
h2 = DTYPE_HIERARCHY.index(dtype2)
|
| 65 |
+
return DTYPE_HIERARCHY[max(h1, h2)]
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_dtype_constructor():
|
| 69 |
+
pytest.importorskip("pyarrow")
|
| 70 |
+
|
| 71 |
+
with tm.assert_produces_warning(FutureWarning):
|
| 72 |
+
dtype = pd.StringDtype("pyarrow_numpy")
|
| 73 |
+
assert dtype == pd.StringDtype("pyarrow", na_value=np.nan)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def test_dtype_equality():
|
| 77 |
+
pytest.importorskip("pyarrow")
|
| 78 |
+
|
| 79 |
+
dtype1 = pd.StringDtype("python")
|
| 80 |
+
dtype2 = pd.StringDtype("pyarrow")
|
| 81 |
+
dtype3 = pd.StringDtype("pyarrow", na_value=np.nan)
|
| 82 |
+
|
| 83 |
+
assert dtype1 == pd.StringDtype("python", na_value=pd.NA)
|
| 84 |
+
assert dtype1 != dtype2
|
| 85 |
+
assert dtype1 != dtype3
|
| 86 |
+
|
| 87 |
+
assert dtype2 == pd.StringDtype("pyarrow", na_value=pd.NA)
|
| 88 |
+
assert dtype2 != dtype1
|
| 89 |
+
assert dtype2 != dtype3
|
| 90 |
+
|
| 91 |
+
assert dtype3 == pd.StringDtype("pyarrow", na_value=np.nan)
|
| 92 |
+
assert dtype3 == pd.StringDtype("pyarrow", na_value=float("nan"))
|
| 93 |
+
assert dtype3 != dtype1
|
| 94 |
+
assert dtype3 != dtype2
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def test_repr(dtype):
|
| 98 |
+
df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)})
|
| 99 |
+
if dtype.na_value is np.nan:
|
| 100 |
+
expected = " A\n0 a\n1 NaN\n2 b"
|
| 101 |
+
else:
|
| 102 |
+
expected = " A\n0 a\n1 <NA>\n2 b"
|
| 103 |
+
assert repr(df) == expected
|
| 104 |
+
|
| 105 |
+
if dtype.na_value is np.nan:
|
| 106 |
+
expected = "0 a\n1 NaN\n2 b\nName: A, dtype: str"
|
| 107 |
+
else:
|
| 108 |
+
expected = "0 a\n1 <NA>\n2 b\nName: A, dtype: string"
|
| 109 |
+
assert repr(df.A) == expected
|
| 110 |
+
|
| 111 |
+
if dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
|
| 112 |
+
arr_name = "ArrowStringArray"
|
| 113 |
+
expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
|
| 114 |
+
elif dtype.storage == "pyarrow" and dtype.na_value is np.nan:
|
| 115 |
+
arr_name = "ArrowStringArrayNumpySemantics"
|
| 116 |
+
expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
|
| 117 |
+
elif dtype.storage == "python" and dtype.na_value is np.nan:
|
| 118 |
+
arr_name = "StringArrayNumpySemantics"
|
| 119 |
+
expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
|
| 120 |
+
else:
|
| 121 |
+
arr_name = "StringArray"
|
| 122 |
+
expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
|
| 123 |
+
assert repr(df.A.array) == expected
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def test_dtype_repr(dtype):
|
| 127 |
+
if dtype.storage == "pyarrow":
|
| 128 |
+
if dtype.na_value is pd.NA:
|
| 129 |
+
assert repr(dtype) == "string[pyarrow]"
|
| 130 |
+
else:
|
| 131 |
+
assert repr(dtype) == "<StringDtype(na_value=nan)>"
|
| 132 |
+
elif dtype.na_value is pd.NA:
|
| 133 |
+
assert repr(dtype) == "string[python]"
|
| 134 |
+
else:
|
| 135 |
+
assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def test_none_to_nan(cls, dtype):
|
| 139 |
+
a = cls._from_sequence(["a", None, "b"], dtype=dtype)
|
| 140 |
+
assert a[1] is not None
|
| 141 |
+
assert a[1] is a.dtype.na_value
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def test_setitem_validates(cls, dtype):
|
| 145 |
+
arr = cls._from_sequence(["a", "b"], dtype=dtype)
|
| 146 |
+
|
| 147 |
+
msg = "Invalid value '10' for dtype 'str"
|
| 148 |
+
with pytest.raises(TypeError, match=msg):
|
| 149 |
+
arr[0] = 10
|
| 150 |
+
|
| 151 |
+
msg = "Invalid value for dtype 'str"
|
| 152 |
+
with pytest.raises(TypeError, match=msg):
|
| 153 |
+
arr[:] = np.array([1, 2])
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def test_setitem_with_scalar_string(dtype):
|
| 157 |
+
# is_float_dtype considers some strings, like 'd', to be floats
|
| 158 |
+
# which can cause issues.
|
| 159 |
+
arr = pd.array(["a", "c"], dtype=dtype)
|
| 160 |
+
arr[0] = "d"
|
| 161 |
+
expected = pd.array(["d", "c"], dtype=dtype)
|
| 162 |
+
tm.assert_extension_array_equal(arr, expected)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def test_setitem_with_array_with_missing(dtype):
|
| 166 |
+
# ensure that when setting with an array of values, we don't mutate the
|
| 167 |
+
# array `value` in __setitem__(self, key, value)
|
| 168 |
+
arr = pd.array(["a", "b", "c"], dtype=dtype)
|
| 169 |
+
value = np.array(["A", None])
|
| 170 |
+
value_orig = value.copy()
|
| 171 |
+
arr[[0, 1]] = value
|
| 172 |
+
|
| 173 |
+
expected = pd.array(["A", pd.NA, "c"], dtype=dtype)
|
| 174 |
+
tm.assert_extension_array_equal(arr, expected)
|
| 175 |
+
tm.assert_numpy_array_equal(value, value_orig)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def test_astype_roundtrip(dtype):
|
| 179 |
+
ser = pd.Series(pd.date_range("2000", periods=12))
|
| 180 |
+
ser[0] = None
|
| 181 |
+
|
| 182 |
+
casted = ser.astype(dtype)
|
| 183 |
+
assert is_dtype_equal(casted.dtype, dtype)
|
| 184 |
+
|
| 185 |
+
result = casted.astype("datetime64[ns]")
|
| 186 |
+
tm.assert_series_equal(result, ser)
|
| 187 |
+
|
| 188 |
+
# GH#38509 same thing for timedelta64
|
| 189 |
+
ser2 = ser - ser.iloc[-1]
|
| 190 |
+
casted2 = ser2.astype(dtype)
|
| 191 |
+
assert is_dtype_equal(casted2.dtype, dtype)
|
| 192 |
+
|
| 193 |
+
result2 = casted2.astype(ser2.dtype)
|
| 194 |
+
tm.assert_series_equal(result2, ser2)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def test_add(dtype):
|
| 198 |
+
a = pd.Series(["a", "b", "c", None, None], dtype=dtype)
|
| 199 |
+
b = pd.Series(["x", "y", None, "z", None], dtype=dtype)
|
| 200 |
+
|
| 201 |
+
result = a + b
|
| 202 |
+
expected = pd.Series(["ax", "by", None, None, None], dtype=dtype)
|
| 203 |
+
tm.assert_series_equal(result, expected)
|
| 204 |
+
|
| 205 |
+
result = a.add(b)
|
| 206 |
+
tm.assert_series_equal(result, expected)
|
| 207 |
+
|
| 208 |
+
result = a.radd(b)
|
| 209 |
+
expected = pd.Series(["xa", "yb", None, None, None], dtype=dtype)
|
| 210 |
+
tm.assert_series_equal(result, expected)
|
| 211 |
+
|
| 212 |
+
result = a.add(b, fill_value="-")
|
| 213 |
+
expected = pd.Series(["ax", "by", "c-", "-z", None], dtype=dtype)
|
| 214 |
+
tm.assert_series_equal(result, expected)
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def test_add_2d(dtype, request):
|
| 218 |
+
if dtype.storage == "pyarrow":
|
| 219 |
+
reason = "Failed: DID NOT RAISE <class 'ValueError'>"
|
| 220 |
+
mark = pytest.mark.xfail(raises=None, reason=reason)
|
| 221 |
+
request.applymarker(mark)
|
| 222 |
+
|
| 223 |
+
a = pd.array(["a", "b", "c"], dtype=dtype)
|
| 224 |
+
b = np.array([["a", "b", "c"]], dtype=object)
|
| 225 |
+
with pytest.raises(ValueError, match="3 != 1"):
|
| 226 |
+
a + b
|
| 227 |
+
|
| 228 |
+
s = pd.Series(a)
|
| 229 |
+
with pytest.raises(ValueError, match="3 != 1"):
|
| 230 |
+
s + b
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def test_add_sequence(dtype):
|
| 234 |
+
a = pd.array(["a", "b", None, None], dtype=dtype)
|
| 235 |
+
other = ["x", None, "y", None]
|
| 236 |
+
|
| 237 |
+
result = a + other
|
| 238 |
+
expected = pd.array(["ax", None, None, None], dtype=dtype)
|
| 239 |
+
tm.assert_extension_array_equal(result, expected)
|
| 240 |
+
|
| 241 |
+
result = other + a
|
| 242 |
+
expected = pd.array(["xa", None, None, None], dtype=dtype)
|
| 243 |
+
tm.assert_extension_array_equal(result, expected)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def test_mul(dtype):
|
| 247 |
+
a = pd.array(["a", "b", None], dtype=dtype)
|
| 248 |
+
result = a * 2
|
| 249 |
+
expected = pd.array(["aa", "bb", None], dtype=dtype)
|
| 250 |
+
tm.assert_extension_array_equal(result, expected)
|
| 251 |
+
|
| 252 |
+
result = 2 * a
|
| 253 |
+
tm.assert_extension_array_equal(result, expected)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
@pytest.mark.xfail(reason="GH-28527")
|
| 257 |
+
def test_add_strings(dtype):
|
| 258 |
+
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
|
| 259 |
+
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
|
| 260 |
+
assert arr.__add__(df) is NotImplemented
|
| 261 |
+
|
| 262 |
+
result = arr + df
|
| 263 |
+
expected = pd.DataFrame([["at", "by", "cv", "dw"]]).astype(dtype)
|
| 264 |
+
tm.assert_frame_equal(result, expected)
|
| 265 |
+
|
| 266 |
+
result = df + arr
|
| 267 |
+
expected = pd.DataFrame([["ta", "yb", "vc", "wd"]]).astype(dtype)
|
| 268 |
+
tm.assert_frame_equal(result, expected)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
@pytest.mark.xfail(reason="GH-28527")
|
| 272 |
+
def test_add_frame(dtype):
|
| 273 |
+
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
|
| 274 |
+
df = pd.DataFrame([["x", np.nan, "y", np.nan]])
|
| 275 |
+
|
| 276 |
+
assert arr.__add__(df) is NotImplemented
|
| 277 |
+
|
| 278 |
+
result = arr + df
|
| 279 |
+
expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
|
| 280 |
+
tm.assert_frame_equal(result, expected)
|
| 281 |
+
|
| 282 |
+
result = df + arr
|
| 283 |
+
expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
|
| 284 |
+
tm.assert_frame_equal(result, expected)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def test_comparison_methods_scalar(comparison_op, dtype):
|
| 288 |
+
op_name = f"__{comparison_op.__name__}__"
|
| 289 |
+
a = pd.array(["a", None, "c"], dtype=dtype)
|
| 290 |
+
other = "a"
|
| 291 |
+
result = getattr(a, op_name)(other)
|
| 292 |
+
if dtype.na_value is np.nan:
|
| 293 |
+
expected = np.array([getattr(item, op_name)(other) for item in a])
|
| 294 |
+
if comparison_op == operator.ne:
|
| 295 |
+
expected[1] = True
|
| 296 |
+
else:
|
| 297 |
+
expected[1] = False
|
| 298 |
+
tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
|
| 299 |
+
else:
|
| 300 |
+
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
| 301 |
+
expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
|
| 302 |
+
expected = pd.array(expected, dtype=expected_dtype)
|
| 303 |
+
tm.assert_extension_array_equal(result, expected)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
|
| 307 |
+
op_name = f"__{comparison_op.__name__}__"
|
| 308 |
+
a = pd.array(["a", None, "c"], dtype=dtype)
|
| 309 |
+
result = getattr(a, op_name)(pd.NA)
|
| 310 |
+
|
| 311 |
+
if dtype.na_value is np.nan:
|
| 312 |
+
if operator.ne == comparison_op:
|
| 313 |
+
expected = np.array([True, True, True])
|
| 314 |
+
else:
|
| 315 |
+
expected = np.array([False, False, False])
|
| 316 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 317 |
+
else:
|
| 318 |
+
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
| 319 |
+
expected = pd.array([None, None, None], dtype=expected_dtype)
|
| 320 |
+
tm.assert_extension_array_equal(result, expected)
|
| 321 |
+
tm.assert_extension_array_equal(result, expected)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def test_comparison_methods_scalar_not_string(comparison_op, dtype):
|
| 325 |
+
op_name = f"__{comparison_op.__name__}__"
|
| 326 |
+
|
| 327 |
+
a = pd.array(["a", None, "c"], dtype=dtype)
|
| 328 |
+
other = 42
|
| 329 |
+
|
| 330 |
+
if op_name not in ["__eq__", "__ne__"]:
|
| 331 |
+
with pytest.raises(TypeError, match="Invalid comparison|not supported between"):
|
| 332 |
+
getattr(a, op_name)(other)
|
| 333 |
+
|
| 334 |
+
return
|
| 335 |
+
|
| 336 |
+
result = getattr(a, op_name)(other)
|
| 337 |
+
|
| 338 |
+
if dtype.na_value is np.nan:
|
| 339 |
+
expected_data = {
|
| 340 |
+
"__eq__": [False, False, False],
|
| 341 |
+
"__ne__": [True, True, True],
|
| 342 |
+
}[op_name]
|
| 343 |
+
expected = np.array(expected_data)
|
| 344 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 345 |
+
else:
|
| 346 |
+
expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
|
| 347 |
+
op_name
|
| 348 |
+
]
|
| 349 |
+
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
| 350 |
+
expected = pd.array(expected_data, dtype=expected_dtype)
|
| 351 |
+
tm.assert_extension_array_equal(result, expected)
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def test_comparison_methods_array(comparison_op, dtype, dtype2):
|
| 355 |
+
op_name = f"__{comparison_op.__name__}__"
|
| 356 |
+
|
| 357 |
+
a = pd.array(["a", None, "c"], dtype=dtype)
|
| 358 |
+
other = pd.array([None, None, "c"], dtype=dtype2)
|
| 359 |
+
result = comparison_op(a, other)
|
| 360 |
+
|
| 361 |
+
# ensure operation is commutative
|
| 362 |
+
result2 = comparison_op(other, a)
|
| 363 |
+
tm.assert_equal(result, result2)
|
| 364 |
+
|
| 365 |
+
if dtype.na_value is np.nan and dtype2.na_value is np.nan:
|
| 366 |
+
if operator.ne == comparison_op:
|
| 367 |
+
expected = np.array([True, True, False])
|
| 368 |
+
else:
|
| 369 |
+
expected = np.array([False, False, False])
|
| 370 |
+
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
| 371 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 372 |
+
|
| 373 |
+
else:
|
| 374 |
+
max_dtype = string_dtype_highest_priority(dtype, dtype2)
|
| 375 |
+
if max_dtype.storage == "python":
|
| 376 |
+
expected_dtype = "boolean"
|
| 377 |
+
else:
|
| 378 |
+
expected_dtype = "bool[pyarrow]"
|
| 379 |
+
|
| 380 |
+
expected = np.full(len(a), fill_value=None, dtype="object")
|
| 381 |
+
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
| 382 |
+
expected = pd.array(expected, dtype=expected_dtype)
|
| 383 |
+
tm.assert_extension_array_equal(result, expected)
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
@td.skip_if_no("pyarrow")
|
| 387 |
+
def test_comparison_methods_array_arrow_extension(comparison_op, dtype2):
|
| 388 |
+
# Test pd.ArrowDtype(pa.string()) against other string arrays
|
| 389 |
+
import pyarrow as pa
|
| 390 |
+
|
| 391 |
+
op_name = f"__{comparison_op.__name__}__"
|
| 392 |
+
dtype = pd.ArrowDtype(pa.string())
|
| 393 |
+
a = pd.array(["a", None, "c"], dtype=dtype)
|
| 394 |
+
other = pd.array([None, None, "c"], dtype=dtype2)
|
| 395 |
+
result = comparison_op(a, other)
|
| 396 |
+
|
| 397 |
+
# ensure operation is commutative
|
| 398 |
+
result2 = comparison_op(other, a)
|
| 399 |
+
tm.assert_equal(result, result2)
|
| 400 |
+
|
| 401 |
+
expected = pd.array([None, None, True], dtype="bool[pyarrow]")
|
| 402 |
+
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
| 403 |
+
tm.assert_extension_array_equal(result, expected)
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def test_comparison_methods_list(comparison_op, dtype):
|
| 407 |
+
op_name = f"__{comparison_op.__name__}__"
|
| 408 |
+
|
| 409 |
+
a = pd.array(["a", None, "c"], dtype=dtype)
|
| 410 |
+
other = [None, None, "c"]
|
| 411 |
+
result = comparison_op(a, other)
|
| 412 |
+
|
| 413 |
+
# ensure operation is commutative
|
| 414 |
+
result2 = comparison_op(other, a)
|
| 415 |
+
tm.assert_equal(result, result2)
|
| 416 |
+
|
| 417 |
+
if dtype.na_value is np.nan:
|
| 418 |
+
if operator.ne == comparison_op:
|
| 419 |
+
expected = np.array([True, True, False])
|
| 420 |
+
else:
|
| 421 |
+
expected = np.array([False, False, False])
|
| 422 |
+
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
| 423 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 424 |
+
|
| 425 |
+
else:
|
| 426 |
+
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
| 427 |
+
expected = np.full(len(a), fill_value=None, dtype="object")
|
| 428 |
+
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
| 429 |
+
expected = pd.array(expected, dtype=expected_dtype)
|
| 430 |
+
tm.assert_extension_array_equal(result, expected)
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
def test_constructor_raises(cls):
|
| 434 |
+
if cls is pd.arrays.StringArray:
|
| 435 |
+
msg = "StringArray requires a sequence of strings or pandas.NA"
|
| 436 |
+
elif cls is StringArrayNumpySemantics:
|
| 437 |
+
msg = "StringArrayNumpySemantics requires a sequence of strings or NaN"
|
| 438 |
+
else:
|
| 439 |
+
msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArray"
|
| 440 |
+
|
| 441 |
+
with pytest.raises(ValueError, match=msg):
|
| 442 |
+
cls(np.array(["a", "b"], dtype="S1"))
|
| 443 |
+
|
| 444 |
+
with pytest.raises(ValueError, match=msg):
|
| 445 |
+
cls(np.array([]))
|
| 446 |
+
|
| 447 |
+
if cls is pd.arrays.StringArray or cls is StringArrayNumpySemantics:
|
| 448 |
+
# GH#45057 np.nan and None do NOT raise, as they are considered valid NAs
|
| 449 |
+
# for string dtype
|
| 450 |
+
cls(np.array(["a", np.nan], dtype=object))
|
| 451 |
+
cls(np.array(["a", None], dtype=object))
|
| 452 |
+
else:
|
| 453 |
+
with pytest.raises(ValueError, match=msg):
|
| 454 |
+
cls(np.array(["a", np.nan], dtype=object))
|
| 455 |
+
with pytest.raises(ValueError, match=msg):
|
| 456 |
+
cls(np.array(["a", None], dtype=object))
|
| 457 |
+
|
| 458 |
+
with pytest.raises(ValueError, match=msg):
|
| 459 |
+
cls(np.array(["a", pd.NaT], dtype=object))
|
| 460 |
+
|
| 461 |
+
with pytest.raises(ValueError, match=msg):
|
| 462 |
+
cls(np.array(["a", np.datetime64("NaT", "ns")], dtype=object))
|
| 463 |
+
|
| 464 |
+
with pytest.raises(ValueError, match=msg):
|
| 465 |
+
cls(np.array(["a", np.timedelta64("NaT", "ns")], dtype=object))
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
@pytest.mark.parametrize("na", [np.nan, np.float64("nan"), float("nan"), None, pd.NA])
|
| 469 |
+
def test_constructor_nan_like(na):
|
| 470 |
+
expected = pd.arrays.StringArray(np.array(["a", pd.NA]))
|
| 471 |
+
tm.assert_extension_array_equal(
|
| 472 |
+
pd.arrays.StringArray(np.array(["a", na], dtype="object")), expected
|
| 473 |
+
)
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
@pytest.mark.parametrize("copy", [True, False])
|
| 477 |
+
def test_from_sequence_no_mutate(copy, cls, dtype):
|
| 478 |
+
nan_arr = np.array(["a", np.nan], dtype=object)
|
| 479 |
+
expected_input = nan_arr.copy()
|
| 480 |
+
na_arr = np.array(["a", pd.NA], dtype=object)
|
| 481 |
+
|
| 482 |
+
result = cls._from_sequence(nan_arr, dtype=dtype, copy=copy)
|
| 483 |
+
|
| 484 |
+
if cls in (ArrowStringArray, ArrowStringArrayNumpySemantics):
|
| 485 |
+
import pyarrow as pa
|
| 486 |
+
|
| 487 |
+
expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True))
|
| 488 |
+
elif cls is StringArrayNumpySemantics:
|
| 489 |
+
expected = cls(nan_arr)
|
| 490 |
+
else:
|
| 491 |
+
expected = cls(na_arr)
|
| 492 |
+
|
| 493 |
+
tm.assert_extension_array_equal(result, expected)
|
| 494 |
+
tm.assert_numpy_array_equal(nan_arr, expected_input)
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
def test_astype_int(dtype):
|
| 498 |
+
arr = pd.array(["1", "2", "3"], dtype=dtype)
|
| 499 |
+
result = arr.astype("int64")
|
| 500 |
+
expected = np.array([1, 2, 3], dtype="int64")
|
| 501 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 502 |
+
|
| 503 |
+
arr = pd.array(["1", pd.NA, "3"], dtype=dtype)
|
| 504 |
+
if dtype.na_value is np.nan:
|
| 505 |
+
err = ValueError
|
| 506 |
+
msg = "cannot convert float NaN to integer"
|
| 507 |
+
else:
|
| 508 |
+
err = TypeError
|
| 509 |
+
msg = (
|
| 510 |
+
r"int\(\) argument must be a string, a bytes-like "
|
| 511 |
+
r"object or a( real)? number"
|
| 512 |
+
)
|
| 513 |
+
with pytest.raises(err, match=msg):
|
| 514 |
+
arr.astype("int64")
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
def test_astype_nullable_int(dtype):
|
| 518 |
+
arr = pd.array(["1", pd.NA, "3"], dtype=dtype)
|
| 519 |
+
|
| 520 |
+
result = arr.astype("Int64")
|
| 521 |
+
expected = pd.array([1, pd.NA, 3], dtype="Int64")
|
| 522 |
+
tm.assert_extension_array_equal(result, expected)
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
def test_astype_float(dtype, any_float_dtype):
|
| 526 |
+
# Don't compare arrays (37974)
|
| 527 |
+
ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype)
|
| 528 |
+
result = ser.astype(any_float_dtype)
|
| 529 |
+
expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype)
|
| 530 |
+
tm.assert_series_equal(result, expected)
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
@pytest.mark.parametrize("skipna", [True, False])
|
| 534 |
+
def test_reduce(skipna, dtype):
|
| 535 |
+
arr = pd.Series(["a", "b", "c"], dtype=dtype)
|
| 536 |
+
result = arr.sum(skipna=skipna)
|
| 537 |
+
assert result == "abc"
|
| 538 |
+
|
| 539 |
+
|
| 540 |
+
@pytest.mark.parametrize("skipna", [True, False])
|
| 541 |
+
def test_reduce_missing(skipna, dtype):
|
| 542 |
+
arr = pd.Series([None, "a", None, "b", "c", None], dtype=dtype)
|
| 543 |
+
result = arr.sum(skipna=skipna)
|
| 544 |
+
if skipna:
|
| 545 |
+
assert result == "abc"
|
| 546 |
+
else:
|
| 547 |
+
assert pd.isna(result)
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
@pytest.mark.parametrize("method", ["min", "max"])
|
| 551 |
+
@pytest.mark.parametrize("skipna", [True, False])
|
| 552 |
+
def test_min_max(method, skipna, dtype):
|
| 553 |
+
arr = pd.Series(["a", "b", "c", None], dtype=dtype)
|
| 554 |
+
result = getattr(arr, method)(skipna=skipna)
|
| 555 |
+
if skipna:
|
| 556 |
+
expected = "a" if method == "min" else "c"
|
| 557 |
+
assert result == expected
|
| 558 |
+
else:
|
| 559 |
+
assert result is arr.dtype.na_value
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
@pytest.mark.parametrize("method", ["min", "max"])
|
| 563 |
+
@pytest.mark.parametrize("box", [pd.Series, pd.array])
|
| 564 |
+
def test_min_max_numpy(method, box, dtype, request):
|
| 565 |
+
if dtype.storage == "pyarrow" and box is pd.array:
|
| 566 |
+
if box is pd.array:
|
| 567 |
+
reason = "'<=' not supported between instances of 'str' and 'NoneType'"
|
| 568 |
+
else:
|
| 569 |
+
reason = "'ArrowStringArray' object has no attribute 'max'"
|
| 570 |
+
mark = pytest.mark.xfail(raises=TypeError, reason=reason)
|
| 571 |
+
request.applymarker(mark)
|
| 572 |
+
|
| 573 |
+
arr = box(["a", "b", "c", None], dtype=dtype)
|
| 574 |
+
result = getattr(np, method)(arr)
|
| 575 |
+
expected = "a" if method == "min" else "c"
|
| 576 |
+
assert result == expected
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
def test_fillna_args(dtype):
|
| 580 |
+
# GH 37987
|
| 581 |
+
|
| 582 |
+
arr = pd.array(["a", pd.NA], dtype=dtype)
|
| 583 |
+
|
| 584 |
+
res = arr.fillna(value="b")
|
| 585 |
+
expected = pd.array(["a", "b"], dtype=dtype)
|
| 586 |
+
tm.assert_extension_array_equal(res, expected)
|
| 587 |
+
|
| 588 |
+
res = arr.fillna(value=np.str_("b"))
|
| 589 |
+
expected = pd.array(["a", "b"], dtype=dtype)
|
| 590 |
+
tm.assert_extension_array_equal(res, expected)
|
| 591 |
+
|
| 592 |
+
msg = "Invalid value '1' for dtype 'str"
|
| 593 |
+
with pytest.raises(TypeError, match=msg):
|
| 594 |
+
arr.fillna(value=1)
|
| 595 |
+
|
| 596 |
+
|
| 597 |
+
def test_arrow_array(dtype):
|
| 598 |
+
# protocol added in 0.15.0
|
| 599 |
+
pa = pytest.importorskip("pyarrow")
|
| 600 |
+
import pyarrow.compute as pc
|
| 601 |
+
|
| 602 |
+
data = pd.array(["a", "b", "c"], dtype=dtype)
|
| 603 |
+
arr = pa.array(data)
|
| 604 |
+
expected = pa.array(list(data), type=pa.large_string(), from_pandas=True)
|
| 605 |
+
if dtype.storage == "pyarrow" and pa_version_under12p0:
|
| 606 |
+
expected = pa.chunked_array(expected)
|
| 607 |
+
if dtype.storage == "python":
|
| 608 |
+
expected = pc.cast(expected, pa.string())
|
| 609 |
+
assert arr.equals(expected)
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
|
| 613 |
+
def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
|
| 614 |
+
# roundtrip possible from arrow 1.0.0
|
| 615 |
+
pa = pytest.importorskip("pyarrow")
|
| 616 |
+
|
| 617 |
+
data = pd.array(["a", "b", None], dtype=dtype)
|
| 618 |
+
df = pd.DataFrame({"a": data})
|
| 619 |
+
table = pa.table(df)
|
| 620 |
+
if dtype.storage == "python":
|
| 621 |
+
assert table.field("a").type == "string"
|
| 622 |
+
else:
|
| 623 |
+
assert table.field("a").type == "large_string"
|
| 624 |
+
with pd.option_context("string_storage", string_storage):
|
| 625 |
+
result = table.to_pandas()
|
| 626 |
+
if dtype.na_value is np.nan and not using_infer_string:
|
| 627 |
+
assert result["a"].dtype == "object"
|
| 628 |
+
else:
|
| 629 |
+
assert isinstance(result["a"].dtype, pd.StringDtype)
|
| 630 |
+
expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
|
| 631 |
+
if using_infer_string:
|
| 632 |
+
expected.columns = expected.columns.astype(
|
| 633 |
+
pd.StringDtype(string_storage, na_value=np.nan)
|
| 634 |
+
)
|
| 635 |
+
tm.assert_frame_equal(result, expected)
|
| 636 |
+
# ensure the missing value is represented by NA and not np.nan or None
|
| 637 |
+
assert result.loc[2, "a"] is result["a"].dtype.na_value
|
| 638 |
+
|
| 639 |
+
|
| 640 |
+
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
|
| 641 |
+
def test_arrow_from_string(using_infer_string):
|
| 642 |
+
# not roundtrip, but starting with pyarrow table without pandas metadata
|
| 643 |
+
pa = pytest.importorskip("pyarrow")
|
| 644 |
+
table = pa.table({"a": pa.array(["a", "b", None], type=pa.string())})
|
| 645 |
+
|
| 646 |
+
result = table.to_pandas()
|
| 647 |
+
|
| 648 |
+
if using_infer_string and not pa_version_under19p0:
|
| 649 |
+
expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="str")
|
| 650 |
+
else:
|
| 651 |
+
expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="object")
|
| 652 |
+
tm.assert_frame_equal(result, expected)
|
| 653 |
+
|
| 654 |
+
|
| 655 |
+
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
|
| 656 |
+
def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
|
| 657 |
+
# GH-41040
|
| 658 |
+
pa = pytest.importorskip("pyarrow")
|
| 659 |
+
|
| 660 |
+
data = pd.array([], dtype=dtype)
|
| 661 |
+
df = pd.DataFrame({"a": data})
|
| 662 |
+
table = pa.table(df)
|
| 663 |
+
if dtype.storage == "python":
|
| 664 |
+
assert table.field("a").type == "string"
|
| 665 |
+
else:
|
| 666 |
+
assert table.field("a").type == "large_string"
|
| 667 |
+
# Instantiate the same table with no chunks at all
|
| 668 |
+
table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
|
| 669 |
+
with pd.option_context("string_storage", string_storage):
|
| 670 |
+
result = table.to_pandas()
|
| 671 |
+
|
| 672 |
+
if dtype.na_value is np.nan and not using_string_dtype():
|
| 673 |
+
assert result["a"].dtype == "object"
|
| 674 |
+
else:
|
| 675 |
+
assert isinstance(result["a"].dtype, pd.StringDtype)
|
| 676 |
+
expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
|
| 677 |
+
if using_infer_string:
|
| 678 |
+
expected.columns = expected.columns.astype(
|
| 679 |
+
pd.StringDtype(string_storage, na_value=np.nan)
|
| 680 |
+
)
|
| 681 |
+
tm.assert_frame_equal(result, expected)
|
| 682 |
+
|
| 683 |
+
|
| 684 |
+
def test_value_counts_na(dtype):
|
| 685 |
+
if dtype.na_value is np.nan:
|
| 686 |
+
exp_dtype = "int64"
|
| 687 |
+
elif dtype.storage == "pyarrow":
|
| 688 |
+
exp_dtype = "int64[pyarrow]"
|
| 689 |
+
else:
|
| 690 |
+
exp_dtype = "Int64"
|
| 691 |
+
arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
|
| 692 |
+
result = arr.value_counts(dropna=False)
|
| 693 |
+
expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype=exp_dtype, name="count")
|
| 694 |
+
tm.assert_series_equal(result, expected)
|
| 695 |
+
|
| 696 |
+
result = arr.value_counts(dropna=True)
|
| 697 |
+
expected = pd.Series([2, 1], index=arr[:2], dtype=exp_dtype, name="count")
|
| 698 |
+
tm.assert_series_equal(result, expected)
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
def test_value_counts_with_normalize(dtype):
|
| 702 |
+
if dtype.na_value is np.nan:
|
| 703 |
+
exp_dtype = np.float64
|
| 704 |
+
elif dtype.storage == "pyarrow":
|
| 705 |
+
exp_dtype = "double[pyarrow]"
|
| 706 |
+
else:
|
| 707 |
+
exp_dtype = "Float64"
|
| 708 |
+
ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
|
| 709 |
+
result = ser.value_counts(normalize=True)
|
| 710 |
+
expected = pd.Series([2, 1], index=ser[:2], dtype=exp_dtype, name="proportion") / 3
|
| 711 |
+
tm.assert_series_equal(result, expected)
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
@pytest.mark.parametrize(
|
| 715 |
+
"values, expected",
|
| 716 |
+
[
|
| 717 |
+
(["a", "b", "c"], np.array([False, False, False])),
|
| 718 |
+
(["a", "b", None], np.array([False, False, True])),
|
| 719 |
+
],
|
| 720 |
+
)
|
| 721 |
+
def test_use_inf_as_na(values, expected, dtype):
|
| 722 |
+
# https://github.com/pandas-dev/pandas/issues/33655
|
| 723 |
+
values = pd.array(values, dtype=dtype)
|
| 724 |
+
msg = "use_inf_as_na option is deprecated"
|
| 725 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 726 |
+
with pd.option_context("mode.use_inf_as_na", True):
|
| 727 |
+
result = values.isna()
|
| 728 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 729 |
+
|
| 730 |
+
result = pd.Series(values).isna()
|
| 731 |
+
expected = pd.Series(expected)
|
| 732 |
+
tm.assert_series_equal(result, expected)
|
| 733 |
+
|
| 734 |
+
result = pd.DataFrame(values).isna()
|
| 735 |
+
expected = pd.DataFrame(expected)
|
| 736 |
+
tm.assert_frame_equal(result, expected)
|
| 737 |
+
|
| 738 |
+
|
| 739 |
+
def test_value_counts_sort_false(dtype):
|
| 740 |
+
if dtype.na_value is np.nan:
|
| 741 |
+
exp_dtype = "int64"
|
| 742 |
+
elif dtype.storage == "pyarrow":
|
| 743 |
+
exp_dtype = "int64[pyarrow]"
|
| 744 |
+
else:
|
| 745 |
+
exp_dtype = "Int64"
|
| 746 |
+
ser = pd.Series(["a", "b", "c", "b"], dtype=dtype)
|
| 747 |
+
result = ser.value_counts(sort=False)
|
| 748 |
+
expected = pd.Series([1, 2, 1], index=ser[:3], dtype=exp_dtype, name="count")
|
| 749 |
+
tm.assert_series_equal(result, expected)
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
def test_memory_usage(dtype):
|
| 753 |
+
# GH 33963
|
| 754 |
+
|
| 755 |
+
if dtype.storage == "pyarrow":
|
| 756 |
+
pytest.skip(f"not applicable for {dtype.storage}")
|
| 757 |
+
|
| 758 |
+
series = pd.Series(["a", "b", "c"], dtype=dtype)
|
| 759 |
+
|
| 760 |
+
assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True)
|
| 761 |
+
|
| 762 |
+
|
| 763 |
+
@pytest.mark.parametrize("float_dtype", [np.float16, np.float32, np.float64])
|
| 764 |
+
def test_astype_from_float_dtype(float_dtype, dtype):
|
| 765 |
+
# https://github.com/pandas-dev/pandas/issues/36451
|
| 766 |
+
ser = pd.Series([0.1], dtype=float_dtype)
|
| 767 |
+
result = ser.astype(dtype)
|
| 768 |
+
expected = pd.Series(["0.1"], dtype=dtype)
|
| 769 |
+
tm.assert_series_equal(result, expected)
|
| 770 |
+
|
| 771 |
+
|
| 772 |
+
def test_to_numpy_returns_pdna_default(dtype):
|
| 773 |
+
arr = pd.array(["a", pd.NA, "b"], dtype=dtype)
|
| 774 |
+
result = np.array(arr)
|
| 775 |
+
expected = np.array(["a", dtype.na_value, "b"], dtype=object)
|
| 776 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 777 |
+
|
| 778 |
+
|
| 779 |
+
def test_to_numpy_na_value(dtype, nulls_fixture):
|
| 780 |
+
na_value = nulls_fixture
|
| 781 |
+
arr = pd.array(["a", pd.NA, "b"], dtype=dtype)
|
| 782 |
+
result = arr.to_numpy(na_value=na_value)
|
| 783 |
+
expected = np.array(["a", na_value, "b"], dtype=object)
|
| 784 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 785 |
+
|
| 786 |
+
|
| 787 |
+
def test_isin(dtype, fixed_now_ts):
|
| 788 |
+
s = pd.Series(["a", "b", None], dtype=dtype)
|
| 789 |
+
|
| 790 |
+
result = s.isin(["a", "c"])
|
| 791 |
+
expected = pd.Series([True, False, False])
|
| 792 |
+
tm.assert_series_equal(result, expected)
|
| 793 |
+
|
| 794 |
+
result = s.isin(["a", pd.NA])
|
| 795 |
+
expected = pd.Series([True, False, True])
|
| 796 |
+
tm.assert_series_equal(result, expected)
|
| 797 |
+
|
| 798 |
+
result = s.isin([])
|
| 799 |
+
expected = pd.Series([False, False, False])
|
| 800 |
+
tm.assert_series_equal(result, expected)
|
| 801 |
+
|
| 802 |
+
result = s.isin(["a", fixed_now_ts])
|
| 803 |
+
expected = pd.Series([True, False, False])
|
| 804 |
+
tm.assert_series_equal(result, expected)
|
| 805 |
+
|
| 806 |
+
result = s.isin([fixed_now_ts])
|
| 807 |
+
expected = pd.Series([False, False, False])
|
| 808 |
+
tm.assert_series_equal(result, expected)
|
| 809 |
+
|
| 810 |
+
|
| 811 |
+
def test_isin_string_array(dtype, dtype2):
|
| 812 |
+
s = pd.Series(["a", "b", None], dtype=dtype)
|
| 813 |
+
|
| 814 |
+
result = s.isin(pd.array(["a", "c"], dtype=dtype2))
|
| 815 |
+
expected = pd.Series([True, False, False])
|
| 816 |
+
tm.assert_series_equal(result, expected)
|
| 817 |
+
|
| 818 |
+
result = s.isin(pd.array(["a", None], dtype=dtype2))
|
| 819 |
+
expected = pd.Series([True, False, True])
|
| 820 |
+
tm.assert_series_equal(result, expected)
|
| 821 |
+
|
| 822 |
+
|
| 823 |
+
def test_isin_arrow_string_array(dtype):
|
| 824 |
+
pa = pytest.importorskip("pyarrow")
|
| 825 |
+
s = pd.Series(["a", "b", None], dtype=dtype)
|
| 826 |
+
|
| 827 |
+
result = s.isin(pd.array(["a", "c"], dtype=pd.ArrowDtype(pa.string())))
|
| 828 |
+
expected = pd.Series([True, False, False])
|
| 829 |
+
tm.assert_series_equal(result, expected)
|
| 830 |
+
|
| 831 |
+
result = s.isin(pd.array(["a", None], dtype=pd.ArrowDtype(pa.string())))
|
| 832 |
+
expected = pd.Series([True, False, True])
|
| 833 |
+
tm.assert_series_equal(result, expected)
|
| 834 |
+
|
| 835 |
+
|
| 836 |
+
def test_setitem_scalar_with_mask_validation(dtype):
|
| 837 |
+
# https://github.com/pandas-dev/pandas/issues/47628
|
| 838 |
+
# setting None with a boolean mask (through _putmaks) should still result
|
| 839 |
+
# in pd.NA values in the underlying array
|
| 840 |
+
ser = pd.Series(["a", "b", "c"], dtype=dtype)
|
| 841 |
+
mask = np.array([False, True, False])
|
| 842 |
+
|
| 843 |
+
ser[mask] = None
|
| 844 |
+
assert ser.array[1] is ser.dtype.na_value
|
| 845 |
+
|
| 846 |
+
# for other non-string we should also raise an error
|
| 847 |
+
ser = pd.Series(["a", "b", "c"], dtype=dtype)
|
| 848 |
+
msg = "Invalid value '1' for dtype 'str"
|
| 849 |
+
with pytest.raises(TypeError, match=msg):
|
| 850 |
+
ser[mask] = 1
|
| 851 |
+
|
| 852 |
+
|
| 853 |
+
def test_from_numpy_str(dtype):
|
| 854 |
+
vals = ["a", "b", "c"]
|
| 855 |
+
arr = np.array(vals, dtype=np.str_)
|
| 856 |
+
result = pd.array(arr, dtype=dtype)
|
| 857 |
+
expected = pd.array(vals, dtype=dtype)
|
| 858 |
+
tm.assert_extension_array_equal(result, expected)
|
| 859 |
+
|
| 860 |
+
|
| 861 |
+
def test_tolist(dtype):
|
| 862 |
+
vals = ["a", "b", "c"]
|
| 863 |
+
arr = pd.array(vals, dtype=dtype)
|
| 864 |
+
result = arr.tolist()
|
| 865 |
+
expected = vals
|
| 866 |
+
tm.assert_equal(result, expected)
|
| 867 |
+
|
| 868 |
+
|
| 869 |
+
@pytest.mark.parametrize("box", [pd.Series, pd.array])
|
| 870 |
+
def test_numpy_array_ufunc(dtype, box):
|
| 871 |
+
arr = box(["a", "bb", "ccc"], dtype=dtype)
|
| 872 |
+
|
| 873 |
+
# custom ufunc that works with string (object) input -> returning numeric
|
| 874 |
+
str_len_ufunc = np.frompyfunc(lambda x: len(x), 1, 1)
|
| 875 |
+
result = str_len_ufunc(arr)
|
| 876 |
+
expected_cls = pd.Series if box is pd.Series else np.array
|
| 877 |
+
# TODO we should infer int64 dtype here?
|
| 878 |
+
expected = expected_cls([1, 2, 3], dtype=object)
|
| 879 |
+
tm.assert_equal(result, expected)
|
| 880 |
+
|
| 881 |
+
# custom ufunc returning strings
|
| 882 |
+
str_multiply_ufunc = np.frompyfunc(lambda x: x * 2, 1, 1)
|
| 883 |
+
result = str_multiply_ufunc(arr)
|
| 884 |
+
expected = box(["aa", "bbbb", "cccccc"], dtype=dtype)
|
| 885 |
+
if dtype.storage == "pyarrow":
|
| 886 |
+
# TODO ArrowStringArray should also preserve the class / dtype
|
| 887 |
+
if box is pd.array:
|
| 888 |
+
expected = np.array(["aa", "bbbb", "cccccc"], dtype=object)
|
| 889 |
+
else:
|
| 890 |
+
# not specifying the dtype because the exact dtype is not yet preserved
|
| 891 |
+
expected = pd.Series(["aa", "bbbb", "cccccc"])
|
| 892 |
+
|
| 893 |
+
tm.assert_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
@pytest.fixture(params=["split", "records", "index", "columns", "values"])
|
| 5 |
+
def orient(request):
|
| 6 |
+
"""
|
| 7 |
+
Fixture for orients excluding the table format.
|
| 8 |
+
"""
|
| 9 |
+
return request.param
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from io import (
|
| 2 |
+
BytesIO,
|
| 3 |
+
StringIO,
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
import pytest
|
| 7 |
+
|
| 8 |
+
import pandas.util._test_decorators as td
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_compression_roundtrip(compression):
|
| 15 |
+
df = pd.DataFrame(
|
| 16 |
+
[[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
|
| 17 |
+
index=["A", "B"],
|
| 18 |
+
columns=["X", "Y", "Z"],
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
with tm.ensure_clean() as path:
|
| 22 |
+
df.to_json(path, compression=compression)
|
| 23 |
+
tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
|
| 24 |
+
|
| 25 |
+
# explicitly ensure file was compressed.
|
| 26 |
+
with tm.decompress_file(path, compression) as fh:
|
| 27 |
+
result = fh.read().decode("utf8")
|
| 28 |
+
data = StringIO(result)
|
| 29 |
+
tm.assert_frame_equal(df, pd.read_json(data))
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def test_read_zipped_json(datapath):
|
| 33 |
+
uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
|
| 34 |
+
uncompressed_df = pd.read_json(uncompressed_path)
|
| 35 |
+
|
| 36 |
+
compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
|
| 37 |
+
compressed_df = pd.read_json(compressed_path, compression="zip")
|
| 38 |
+
|
| 39 |
+
tm.assert_frame_equal(uncompressed_df, compressed_df)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@td.skip_if_not_us_locale
|
| 43 |
+
@pytest.mark.single_cpu
|
| 44 |
+
def test_with_s3_url(compression, s3_public_bucket, s3so):
|
| 45 |
+
# Bucket created in tests/io/conftest.py
|
| 46 |
+
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
|
| 47 |
+
|
| 48 |
+
with tm.ensure_clean() as path:
|
| 49 |
+
df.to_json(path, compression=compression)
|
| 50 |
+
with open(path, "rb") as f:
|
| 51 |
+
s3_public_bucket.put_object(Key="test-1", Body=f)
|
| 52 |
+
|
| 53 |
+
roundtripped_df = pd.read_json(
|
| 54 |
+
f"s3://{s3_public_bucket.name}/test-1",
|
| 55 |
+
compression=compression,
|
| 56 |
+
storage_options=s3so,
|
| 57 |
+
)
|
| 58 |
+
tm.assert_frame_equal(df, roundtripped_df)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def test_lines_with_compression(compression):
|
| 62 |
+
with tm.ensure_clean() as path:
|
| 63 |
+
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
|
| 64 |
+
df.to_json(path, orient="records", lines=True, compression=compression)
|
| 65 |
+
roundtripped_df = pd.read_json(path, lines=True, compression=compression)
|
| 66 |
+
tm.assert_frame_equal(df, roundtripped_df)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def test_chunksize_with_compression(compression):
|
| 70 |
+
with tm.ensure_clean() as path:
|
| 71 |
+
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
|
| 72 |
+
df.to_json(path, orient="records", lines=True, compression=compression)
|
| 73 |
+
|
| 74 |
+
with pd.read_json(
|
| 75 |
+
path, lines=True, chunksize=1, compression=compression
|
| 76 |
+
) as res:
|
| 77 |
+
roundtripped_df = pd.concat(res)
|
| 78 |
+
tm.assert_frame_equal(df, roundtripped_df)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def test_write_unsupported_compression_type():
|
| 82 |
+
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
|
| 83 |
+
with tm.ensure_clean() as path:
|
| 84 |
+
msg = "Unrecognized compression type: unsupported"
|
| 85 |
+
with pytest.raises(ValueError, match=msg):
|
| 86 |
+
df.to_json(path, compression="unsupported")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def test_read_unsupported_compression_type():
|
| 90 |
+
with tm.ensure_clean() as path:
|
| 91 |
+
msg = "Unrecognized compression type: unsupported"
|
| 92 |
+
with pytest.raises(ValueError, match=msg):
|
| 93 |
+
pd.read_json(path, compression="unsupported")
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
@pytest.mark.parametrize(
|
| 97 |
+
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
| 98 |
+
)
|
| 99 |
+
@pytest.mark.parametrize("to_infer", [True, False])
|
| 100 |
+
@pytest.mark.parametrize("read_infer", [True, False])
|
| 101 |
+
def test_to_json_compression(
|
| 102 |
+
compression_only, read_infer, to_infer, compression_to_extension, infer_string
|
| 103 |
+
):
|
| 104 |
+
with pd.option_context("future.infer_string", infer_string):
|
| 105 |
+
# see gh-15008
|
| 106 |
+
compression = compression_only
|
| 107 |
+
|
| 108 |
+
# We'll complete file extension subsequently.
|
| 109 |
+
filename = "test."
|
| 110 |
+
filename += compression_to_extension[compression]
|
| 111 |
+
|
| 112 |
+
df = pd.DataFrame({"A": [1]})
|
| 113 |
+
|
| 114 |
+
to_compression = "infer" if to_infer else compression
|
| 115 |
+
read_compression = "infer" if read_infer else compression
|
| 116 |
+
|
| 117 |
+
with tm.ensure_clean(filename) as path:
|
| 118 |
+
df.to_json(path, compression=to_compression)
|
| 119 |
+
result = pd.read_json(path, compression=read_compression)
|
| 120 |
+
tm.assert_frame_equal(result, df)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def test_to_json_compression_mode(compression):
|
| 124 |
+
# GH 39985 (read_json does not support user-provided binary files)
|
| 125 |
+
expected = pd.DataFrame({"A": [1]})
|
| 126 |
+
|
| 127 |
+
with BytesIO() as buffer:
|
| 128 |
+
expected.to_json(buffer, compression=compression)
|
| 129 |
+
# df = pd.read_json(buffer, compression=compression)
|
| 130 |
+
# tm.assert_frame_equal(expected, df)
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for the deprecated keyword arguments for `read_json`.
|
| 3 |
+
"""
|
| 4 |
+
from io import StringIO
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import pandas._testing as tm
|
| 8 |
+
|
| 9 |
+
from pandas.io.json import read_json
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_good_kwargs():
|
| 13 |
+
df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2])
|
| 14 |
+
|
| 15 |
+
with tm.assert_produces_warning(None):
|
| 16 |
+
data1 = StringIO(df.to_json(orient="split"))
|
| 17 |
+
tm.assert_frame_equal(df, read_json(data1, orient="split"))
|
| 18 |
+
data2 = StringIO(df.to_json(orient="columns"))
|
| 19 |
+
tm.assert_frame_equal(df, read_json(data2, orient="columns"))
|
| 20 |
+
data3 = StringIO(df.to_json(orient="index"))
|
| 21 |
+
tm.assert_frame_equal(df, read_json(data3, orient="index"))
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for ExtensionDtype Table Schema integration."""
|
| 2 |
+
|
| 3 |
+
from collections import OrderedDict
|
| 4 |
+
import datetime as dt
|
| 5 |
+
import decimal
|
| 6 |
+
from io import StringIO
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
from pandas import (
|
| 12 |
+
NA,
|
| 13 |
+
DataFrame,
|
| 14 |
+
Index,
|
| 15 |
+
array,
|
| 16 |
+
read_json,
|
| 17 |
+
)
|
| 18 |
+
import pandas._testing as tm
|
| 19 |
+
from pandas.core.arrays.integer import Int64Dtype
|
| 20 |
+
from pandas.core.arrays.string_ import StringDtype
|
| 21 |
+
from pandas.core.series import Series
|
| 22 |
+
from pandas.tests.extension.date import (
|
| 23 |
+
DateArray,
|
| 24 |
+
DateDtype,
|
| 25 |
+
)
|
| 26 |
+
from pandas.tests.extension.decimal.array import (
|
| 27 |
+
DecimalArray,
|
| 28 |
+
DecimalDtype,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
from pandas.io.json._table_schema import (
|
| 32 |
+
as_json_table_type,
|
| 33 |
+
build_table_schema,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class TestBuildSchema:
|
| 38 |
+
def test_build_table_schema(self):
|
| 39 |
+
df = DataFrame(
|
| 40 |
+
{
|
| 41 |
+
"A": DateArray([dt.date(2021, 10, 10)]),
|
| 42 |
+
"B": DecimalArray([decimal.Decimal(10)]),
|
| 43 |
+
"C": array(["pandas"], dtype="string"),
|
| 44 |
+
"D": array([10], dtype="Int64"),
|
| 45 |
+
}
|
| 46 |
+
)
|
| 47 |
+
result = build_table_schema(df, version=False)
|
| 48 |
+
expected = {
|
| 49 |
+
"fields": [
|
| 50 |
+
{"name": "index", "type": "integer"},
|
| 51 |
+
{"name": "A", "type": "any", "extDtype": "DateDtype"},
|
| 52 |
+
{"name": "B", "type": "number", "extDtype": "decimal"},
|
| 53 |
+
{"name": "C", "type": "string", "extDtype": "string"},
|
| 54 |
+
{"name": "D", "type": "integer", "extDtype": "Int64"},
|
| 55 |
+
],
|
| 56 |
+
"primaryKey": ["index"],
|
| 57 |
+
}
|
| 58 |
+
assert result == expected
|
| 59 |
+
result = build_table_schema(df)
|
| 60 |
+
assert "pandas_version" in result
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class TestTableSchemaType:
|
| 64 |
+
@pytest.mark.parametrize(
|
| 65 |
+
"date_data",
|
| 66 |
+
[
|
| 67 |
+
DateArray([dt.date(2021, 10, 10)]),
|
| 68 |
+
DateArray(dt.date(2021, 10, 10)),
|
| 69 |
+
Series(DateArray(dt.date(2021, 10, 10))),
|
| 70 |
+
],
|
| 71 |
+
)
|
| 72 |
+
def test_as_json_table_type_ext_date_array_dtype(self, date_data):
|
| 73 |
+
assert as_json_table_type(date_data.dtype) == "any"
|
| 74 |
+
|
| 75 |
+
def test_as_json_table_type_ext_date_dtype(self):
|
| 76 |
+
assert as_json_table_type(DateDtype()) == "any"
|
| 77 |
+
|
| 78 |
+
@pytest.mark.parametrize(
|
| 79 |
+
"decimal_data",
|
| 80 |
+
[
|
| 81 |
+
DecimalArray([decimal.Decimal(10)]),
|
| 82 |
+
Series(DecimalArray([decimal.Decimal(10)])),
|
| 83 |
+
],
|
| 84 |
+
)
|
| 85 |
+
def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data):
|
| 86 |
+
assert as_json_table_type(decimal_data.dtype) == "number"
|
| 87 |
+
|
| 88 |
+
def test_as_json_table_type_ext_decimal_dtype(self):
|
| 89 |
+
assert as_json_table_type(DecimalDtype()) == "number"
|
| 90 |
+
|
| 91 |
+
@pytest.mark.parametrize(
|
| 92 |
+
"string_data",
|
| 93 |
+
[
|
| 94 |
+
array(["pandas"], dtype="string"),
|
| 95 |
+
Series(array(["pandas"], dtype="string")),
|
| 96 |
+
],
|
| 97 |
+
)
|
| 98 |
+
def test_as_json_table_type_ext_string_array_dtype(self, string_data):
|
| 99 |
+
assert as_json_table_type(string_data.dtype) == "string"
|
| 100 |
+
|
| 101 |
+
def test_as_json_table_type_ext_string_dtype(self):
|
| 102 |
+
assert as_json_table_type(StringDtype()) == "string"
|
| 103 |
+
|
| 104 |
+
@pytest.mark.parametrize(
|
| 105 |
+
"integer_data",
|
| 106 |
+
[
|
| 107 |
+
array([10], dtype="Int64"),
|
| 108 |
+
Series(array([10], dtype="Int64")),
|
| 109 |
+
],
|
| 110 |
+
)
|
| 111 |
+
def test_as_json_table_type_ext_integer_array_dtype(self, integer_data):
|
| 112 |
+
assert as_json_table_type(integer_data.dtype) == "integer"
|
| 113 |
+
|
| 114 |
+
def test_as_json_table_type_ext_integer_dtype(self):
|
| 115 |
+
assert as_json_table_type(Int64Dtype()) == "integer"
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class TestTableOrient:
|
| 119 |
+
@pytest.fixture
|
| 120 |
+
def da(self):
|
| 121 |
+
return DateArray([dt.date(2021, 10, 10)])
|
| 122 |
+
|
| 123 |
+
@pytest.fixture
|
| 124 |
+
def dc(self):
|
| 125 |
+
return DecimalArray([decimal.Decimal(10)])
|
| 126 |
+
|
| 127 |
+
@pytest.fixture
|
| 128 |
+
def sa(self):
|
| 129 |
+
return array(["pandas"], dtype="string")
|
| 130 |
+
|
| 131 |
+
@pytest.fixture
|
| 132 |
+
def ia(self):
|
| 133 |
+
return array([10], dtype="Int64")
|
| 134 |
+
|
| 135 |
+
@pytest.fixture
|
| 136 |
+
def df(self, da, dc, sa, ia):
|
| 137 |
+
return DataFrame(
|
| 138 |
+
{
|
| 139 |
+
"A": da,
|
| 140 |
+
"B": dc,
|
| 141 |
+
"C": sa,
|
| 142 |
+
"D": ia,
|
| 143 |
+
}
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
def test_build_date_series(self, da):
|
| 147 |
+
s = Series(da, name="a")
|
| 148 |
+
s.index.name = "id"
|
| 149 |
+
result = s.to_json(orient="table", date_format="iso")
|
| 150 |
+
result = json.loads(result, object_pairs_hook=OrderedDict)
|
| 151 |
+
|
| 152 |
+
assert "pandas_version" in result["schema"]
|
| 153 |
+
result["schema"].pop("pandas_version")
|
| 154 |
+
|
| 155 |
+
fields = [
|
| 156 |
+
{"name": "id", "type": "integer"},
|
| 157 |
+
{"name": "a", "type": "any", "extDtype": "DateDtype"},
|
| 158 |
+
]
|
| 159 |
+
|
| 160 |
+
schema = {"fields": fields, "primaryKey": ["id"]}
|
| 161 |
+
|
| 162 |
+
expected = OrderedDict(
|
| 163 |
+
[
|
| 164 |
+
("schema", schema),
|
| 165 |
+
("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000")])]),
|
| 166 |
+
]
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
assert result == expected
|
| 170 |
+
|
| 171 |
+
def test_build_decimal_series(self, dc):
|
| 172 |
+
s = Series(dc, name="a")
|
| 173 |
+
s.index.name = "id"
|
| 174 |
+
result = s.to_json(orient="table", date_format="iso")
|
| 175 |
+
result = json.loads(result, object_pairs_hook=OrderedDict)
|
| 176 |
+
|
| 177 |
+
assert "pandas_version" in result["schema"]
|
| 178 |
+
result["schema"].pop("pandas_version")
|
| 179 |
+
|
| 180 |
+
fields = [
|
| 181 |
+
{"name": "id", "type": "integer"},
|
| 182 |
+
{"name": "a", "type": "number", "extDtype": "decimal"},
|
| 183 |
+
]
|
| 184 |
+
|
| 185 |
+
schema = {"fields": fields, "primaryKey": ["id"]}
|
| 186 |
+
|
| 187 |
+
expected = OrderedDict(
|
| 188 |
+
[
|
| 189 |
+
("schema", schema),
|
| 190 |
+
("data", [OrderedDict([("id", 0), ("a", 10.0)])]),
|
| 191 |
+
]
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
assert result == expected
|
| 195 |
+
|
| 196 |
+
def test_build_string_series(self, sa):
|
| 197 |
+
s = Series(sa, name="a")
|
| 198 |
+
s.index.name = "id"
|
| 199 |
+
result = s.to_json(orient="table", date_format="iso")
|
| 200 |
+
result = json.loads(result, object_pairs_hook=OrderedDict)
|
| 201 |
+
|
| 202 |
+
assert "pandas_version" in result["schema"]
|
| 203 |
+
result["schema"].pop("pandas_version")
|
| 204 |
+
|
| 205 |
+
fields = [
|
| 206 |
+
{"name": "id", "type": "integer"},
|
| 207 |
+
{"name": "a", "type": "string", "extDtype": "string"},
|
| 208 |
+
]
|
| 209 |
+
|
| 210 |
+
schema = {"fields": fields, "primaryKey": ["id"]}
|
| 211 |
+
|
| 212 |
+
expected = OrderedDict(
|
| 213 |
+
[
|
| 214 |
+
("schema", schema),
|
| 215 |
+
("data", [OrderedDict([("id", 0), ("a", "pandas")])]),
|
| 216 |
+
]
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
assert result == expected
|
| 220 |
+
|
| 221 |
+
def test_build_int64_series(self, ia):
|
| 222 |
+
s = Series(ia, name="a")
|
| 223 |
+
s.index.name = "id"
|
| 224 |
+
result = s.to_json(orient="table", date_format="iso")
|
| 225 |
+
result = json.loads(result, object_pairs_hook=OrderedDict)
|
| 226 |
+
|
| 227 |
+
assert "pandas_version" in result["schema"]
|
| 228 |
+
result["schema"].pop("pandas_version")
|
| 229 |
+
|
| 230 |
+
fields = [
|
| 231 |
+
{"name": "id", "type": "integer"},
|
| 232 |
+
{"name": "a", "type": "integer", "extDtype": "Int64"},
|
| 233 |
+
]
|
| 234 |
+
|
| 235 |
+
schema = {"fields": fields, "primaryKey": ["id"]}
|
| 236 |
+
|
| 237 |
+
expected = OrderedDict(
|
| 238 |
+
[
|
| 239 |
+
("schema", schema),
|
| 240 |
+
("data", [OrderedDict([("id", 0), ("a", 10)])]),
|
| 241 |
+
]
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
assert result == expected
|
| 245 |
+
|
| 246 |
+
def test_to_json(self, df):
|
| 247 |
+
df = df.copy()
|
| 248 |
+
df.index.name = "idx"
|
| 249 |
+
result = df.to_json(orient="table", date_format="iso")
|
| 250 |
+
result = json.loads(result, object_pairs_hook=OrderedDict)
|
| 251 |
+
|
| 252 |
+
assert "pandas_version" in result["schema"]
|
| 253 |
+
result["schema"].pop("pandas_version")
|
| 254 |
+
|
| 255 |
+
fields = [
|
| 256 |
+
OrderedDict({"name": "idx", "type": "integer"}),
|
| 257 |
+
OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
|
| 258 |
+
OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
|
| 259 |
+
OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
|
| 260 |
+
OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
|
| 261 |
+
]
|
| 262 |
+
|
| 263 |
+
schema = OrderedDict({"fields": fields, "primaryKey": ["idx"]})
|
| 264 |
+
data = [
|
| 265 |
+
OrderedDict(
|
| 266 |
+
[
|
| 267 |
+
("idx", 0),
|
| 268 |
+
("A", "2021-10-10T00:00:00.000"),
|
| 269 |
+
("B", 10.0),
|
| 270 |
+
("C", "pandas"),
|
| 271 |
+
("D", 10),
|
| 272 |
+
]
|
| 273 |
+
)
|
| 274 |
+
]
|
| 275 |
+
expected = OrderedDict([("schema", schema), ("data", data)])
|
| 276 |
+
|
| 277 |
+
assert result == expected
|
| 278 |
+
|
| 279 |
+
def test_json_ext_dtype_reading_roundtrip(self):
|
| 280 |
+
# GH#40255
|
| 281 |
+
df = DataFrame(
|
| 282 |
+
{
|
| 283 |
+
"a": Series([2, NA], dtype="Int64"),
|
| 284 |
+
"b": Series([1.5, NA], dtype="Float64"),
|
| 285 |
+
"c": Series([True, NA], dtype="boolean"),
|
| 286 |
+
},
|
| 287 |
+
index=Index([1, NA], dtype="Int64"),
|
| 288 |
+
)
|
| 289 |
+
expected = df.copy()
|
| 290 |
+
data_json = df.to_json(orient="table", indent=4)
|
| 291 |
+
result = read_json(StringIO(data_json), orient="table")
|
| 292 |
+
tm.assert_frame_equal(result, expected)
|
| 293 |
+
|
| 294 |
+
def test_json_ext_dtype_reading(self):
|
| 295 |
+
# GH#40255
|
| 296 |
+
data_json = """{
|
| 297 |
+
"schema":{
|
| 298 |
+
"fields":[
|
| 299 |
+
{
|
| 300 |
+
"name":"a",
|
| 301 |
+
"type":"integer",
|
| 302 |
+
"extDtype":"Int64"
|
| 303 |
+
}
|
| 304 |
+
],
|
| 305 |
+
},
|
| 306 |
+
"data":[
|
| 307 |
+
{
|
| 308 |
+
"a":2
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"a":null
|
| 312 |
+
}
|
| 313 |
+
]
|
| 314 |
+
}"""
|
| 315 |
+
result = read_json(StringIO(data_json), orient="table")
|
| 316 |
+
expected = DataFrame({"a": Series([2, NA], dtype="Int64")})
|
| 317 |
+
tm.assert_frame_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py
ADDED
|
@@ -0,0 +1,907 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas import (
|
| 7 |
+
DataFrame,
|
| 8 |
+
Index,
|
| 9 |
+
Series,
|
| 10 |
+
json_normalize,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
from pandas.io.json._normalize import nested_to_record
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@pytest.fixture
|
| 18 |
+
def deep_nested():
|
| 19 |
+
# deeply nested data
|
| 20 |
+
return [
|
| 21 |
+
{
|
| 22 |
+
"country": "USA",
|
| 23 |
+
"states": [
|
| 24 |
+
{
|
| 25 |
+
"name": "California",
|
| 26 |
+
"cities": [
|
| 27 |
+
{"name": "San Francisco", "pop": 12345},
|
| 28 |
+
{"name": "Los Angeles", "pop": 12346},
|
| 29 |
+
],
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"name": "Ohio",
|
| 33 |
+
"cities": [
|
| 34 |
+
{"name": "Columbus", "pop": 1234},
|
| 35 |
+
{"name": "Cleveland", "pop": 1236},
|
| 36 |
+
],
|
| 37 |
+
},
|
| 38 |
+
],
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"country": "Germany",
|
| 42 |
+
"states": [
|
| 43 |
+
{"name": "Bayern", "cities": [{"name": "Munich", "pop": 12347}]},
|
| 44 |
+
{
|
| 45 |
+
"name": "Nordrhein-Westfalen",
|
| 46 |
+
"cities": [
|
| 47 |
+
{"name": "Duesseldorf", "pop": 1238},
|
| 48 |
+
{"name": "Koeln", "pop": 1239},
|
| 49 |
+
],
|
| 50 |
+
},
|
| 51 |
+
],
|
| 52 |
+
},
|
| 53 |
+
]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@pytest.fixture
|
| 57 |
+
def state_data():
|
| 58 |
+
return [
|
| 59 |
+
{
|
| 60 |
+
"counties": [
|
| 61 |
+
{"name": "Dade", "population": 12345},
|
| 62 |
+
{"name": "Broward", "population": 40000},
|
| 63 |
+
{"name": "Palm Beach", "population": 60000},
|
| 64 |
+
],
|
| 65 |
+
"info": {"governor": "Rick Scott"},
|
| 66 |
+
"shortname": "FL",
|
| 67 |
+
"state": "Florida",
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"counties": [
|
| 71 |
+
{"name": "Summit", "population": 1234},
|
| 72 |
+
{"name": "Cuyahoga", "population": 1337},
|
| 73 |
+
],
|
| 74 |
+
"info": {"governor": "John Kasich"},
|
| 75 |
+
"shortname": "OH",
|
| 76 |
+
"state": "Ohio",
|
| 77 |
+
},
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@pytest.fixture
|
| 82 |
+
def author_missing_data():
|
| 83 |
+
return [
|
| 84 |
+
{"info": None},
|
| 85 |
+
{
|
| 86 |
+
"info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
|
| 87 |
+
"author_name": {"first": "Jane", "last_name": "Doe"},
|
| 88 |
+
},
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@pytest.fixture
|
| 93 |
+
def missing_metadata():
|
| 94 |
+
return [
|
| 95 |
+
{
|
| 96 |
+
"name": "Alice",
|
| 97 |
+
"addresses": [
|
| 98 |
+
{
|
| 99 |
+
"number": 9562,
|
| 100 |
+
"street": "Morris St.",
|
| 101 |
+
"city": "Massillon",
|
| 102 |
+
"state": "OH",
|
| 103 |
+
"zip": 44646,
|
| 104 |
+
}
|
| 105 |
+
],
|
| 106 |
+
"previous_residences": {"cities": [{"city_name": "Foo York City"}]},
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"addresses": [
|
| 110 |
+
{
|
| 111 |
+
"number": 8449,
|
| 112 |
+
"street": "Spring St.",
|
| 113 |
+
"city": "Elizabethton",
|
| 114 |
+
"state": "TN",
|
| 115 |
+
"zip": 37643,
|
| 116 |
+
}
|
| 117 |
+
],
|
| 118 |
+
"previous_residences": {"cities": [{"city_name": "Barmingham"}]},
|
| 119 |
+
},
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
@pytest.fixture
|
| 124 |
+
def max_level_test_input_data():
|
| 125 |
+
"""
|
| 126 |
+
input data to test json_normalize with max_level param
|
| 127 |
+
"""
|
| 128 |
+
return [
|
| 129 |
+
{
|
| 130 |
+
"CreatedBy": {"Name": "User001"},
|
| 131 |
+
"Lookup": {
|
| 132 |
+
"TextField": "Some text",
|
| 133 |
+
"UserField": {"Id": "ID001", "Name": "Name001"},
|
| 134 |
+
},
|
| 135 |
+
"Image": {"a": "b"},
|
| 136 |
+
}
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class TestJSONNormalize:
|
| 141 |
+
def test_simple_records(self):
|
| 142 |
+
recs = [
|
| 143 |
+
{"a": 1, "b": 2, "c": 3},
|
| 144 |
+
{"a": 4, "b": 5, "c": 6},
|
| 145 |
+
{"a": 7, "b": 8, "c": 9},
|
| 146 |
+
{"a": 10, "b": 11, "c": 12},
|
| 147 |
+
]
|
| 148 |
+
|
| 149 |
+
result = json_normalize(recs)
|
| 150 |
+
expected = DataFrame(recs)
|
| 151 |
+
|
| 152 |
+
tm.assert_frame_equal(result, expected)
|
| 153 |
+
|
| 154 |
+
def test_simple_normalize(self, state_data):
|
| 155 |
+
result = json_normalize(state_data[0], "counties")
|
| 156 |
+
expected = DataFrame(state_data[0]["counties"])
|
| 157 |
+
tm.assert_frame_equal(result, expected)
|
| 158 |
+
|
| 159 |
+
result = json_normalize(state_data, "counties")
|
| 160 |
+
|
| 161 |
+
expected = []
|
| 162 |
+
for rec in state_data:
|
| 163 |
+
expected.extend(rec["counties"])
|
| 164 |
+
expected = DataFrame(expected)
|
| 165 |
+
|
| 166 |
+
tm.assert_frame_equal(result, expected)
|
| 167 |
+
|
| 168 |
+
result = json_normalize(state_data, "counties", meta="state")
|
| 169 |
+
expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
|
| 170 |
+
|
| 171 |
+
tm.assert_frame_equal(result, expected)
|
| 172 |
+
|
| 173 |
+
def test_fields_list_type_normalize(self):
|
| 174 |
+
parse_metadata_fields_list_type = [
|
| 175 |
+
{"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}
|
| 176 |
+
]
|
| 177 |
+
result = json_normalize(
|
| 178 |
+
parse_metadata_fields_list_type,
|
| 179 |
+
record_path=["values"],
|
| 180 |
+
meta=[["metadata", "listdata"]],
|
| 181 |
+
)
|
| 182 |
+
expected = DataFrame(
|
| 183 |
+
{0: [1, 2, 3], "metadata.listdata": [[1, 2], [1, 2], [1, 2]]}
|
| 184 |
+
)
|
| 185 |
+
tm.assert_frame_equal(result, expected)
|
| 186 |
+
|
| 187 |
+
def test_empty_array(self):
|
| 188 |
+
result = json_normalize([])
|
| 189 |
+
expected = DataFrame()
|
| 190 |
+
tm.assert_frame_equal(result, expected)
|
| 191 |
+
|
| 192 |
+
@pytest.mark.parametrize(
|
| 193 |
+
"data, record_path, exception_type",
|
| 194 |
+
[
|
| 195 |
+
([{"a": 0}, {"a": 1}], None, None),
|
| 196 |
+
({"a": [{"a": 0}, {"a": 1}]}, "a", None),
|
| 197 |
+
('{"a": [{"a": 0}, {"a": 1}]}', None, NotImplementedError),
|
| 198 |
+
(None, None, NotImplementedError),
|
| 199 |
+
],
|
| 200 |
+
)
|
| 201 |
+
def test_accepted_input(self, data, record_path, exception_type):
|
| 202 |
+
if exception_type is not None:
|
| 203 |
+
with pytest.raises(exception_type, match=""):
|
| 204 |
+
json_normalize(data, record_path=record_path)
|
| 205 |
+
else:
|
| 206 |
+
result = json_normalize(data, record_path=record_path)
|
| 207 |
+
expected = DataFrame([0, 1], columns=["a"])
|
| 208 |
+
tm.assert_frame_equal(result, expected)
|
| 209 |
+
|
| 210 |
+
def test_simple_normalize_with_separator(self, deep_nested):
|
| 211 |
+
# GH 14883
|
| 212 |
+
result = json_normalize({"A": {"A": 1, "B": 2}})
|
| 213 |
+
expected = DataFrame([[1, 2]], columns=["A.A", "A.B"])
|
| 214 |
+
tm.assert_frame_equal(result.reindex_like(expected), expected)
|
| 215 |
+
|
| 216 |
+
result = json_normalize({"A": {"A": 1, "B": 2}}, sep="_")
|
| 217 |
+
expected = DataFrame([[1, 2]], columns=["A_A", "A_B"])
|
| 218 |
+
tm.assert_frame_equal(result.reindex_like(expected), expected)
|
| 219 |
+
|
| 220 |
+
result = json_normalize({"A": {"A": 1, "B": 2}}, sep="\u03c3")
|
| 221 |
+
expected = DataFrame([[1, 2]], columns=["A\u03c3A", "A\u03c3B"])
|
| 222 |
+
tm.assert_frame_equal(result.reindex_like(expected), expected)
|
| 223 |
+
|
| 224 |
+
result = json_normalize(
|
| 225 |
+
deep_nested,
|
| 226 |
+
["states", "cities"],
|
| 227 |
+
meta=["country", ["states", "name"]],
|
| 228 |
+
sep="_",
|
| 229 |
+
)
|
| 230 |
+
expected = Index(["name", "pop", "country", "states_name"]).sort_values()
|
| 231 |
+
assert result.columns.sort_values().equals(expected)
|
| 232 |
+
|
| 233 |
+
def test_normalize_with_multichar_separator(self):
|
| 234 |
+
# GH #43831
|
| 235 |
+
data = {"a": [1, 2], "b": {"b_1": 2, "b_2": (3, 4)}}
|
| 236 |
+
result = json_normalize(data, sep="__")
|
| 237 |
+
expected = DataFrame([[[1, 2], 2, (3, 4)]], columns=["a", "b__b_1", "b__b_2"])
|
| 238 |
+
tm.assert_frame_equal(result, expected)
|
| 239 |
+
|
| 240 |
+
def test_value_array_record_prefix(self):
|
| 241 |
+
# GH 21536
|
| 242 |
+
result = json_normalize({"A": [1, 2]}, "A", record_prefix="Prefix.")
|
| 243 |
+
expected = DataFrame([[1], [2]], columns=["Prefix.0"])
|
| 244 |
+
tm.assert_frame_equal(result, expected)
|
| 245 |
+
|
| 246 |
+
def test_nested_object_record_path(self):
|
| 247 |
+
# GH 22706
|
| 248 |
+
data = {
|
| 249 |
+
"state": "Florida",
|
| 250 |
+
"info": {
|
| 251 |
+
"governor": "Rick Scott",
|
| 252 |
+
"counties": [
|
| 253 |
+
{"name": "Dade", "population": 12345},
|
| 254 |
+
{"name": "Broward", "population": 40000},
|
| 255 |
+
{"name": "Palm Beach", "population": 60000},
|
| 256 |
+
],
|
| 257 |
+
},
|
| 258 |
+
}
|
| 259 |
+
result = json_normalize(data, record_path=["info", "counties"])
|
| 260 |
+
expected = DataFrame(
|
| 261 |
+
[["Dade", 12345], ["Broward", 40000], ["Palm Beach", 60000]],
|
| 262 |
+
columns=["name", "population"],
|
| 263 |
+
)
|
| 264 |
+
tm.assert_frame_equal(result, expected)
|
| 265 |
+
|
| 266 |
+
def test_more_deeply_nested(self, deep_nested):
|
| 267 |
+
result = json_normalize(
|
| 268 |
+
deep_nested, ["states", "cities"], meta=["country", ["states", "name"]]
|
| 269 |
+
)
|
| 270 |
+
ex_data = {
|
| 271 |
+
"country": ["USA"] * 4 + ["Germany"] * 3,
|
| 272 |
+
"states.name": [
|
| 273 |
+
"California",
|
| 274 |
+
"California",
|
| 275 |
+
"Ohio",
|
| 276 |
+
"Ohio",
|
| 277 |
+
"Bayern",
|
| 278 |
+
"Nordrhein-Westfalen",
|
| 279 |
+
"Nordrhein-Westfalen",
|
| 280 |
+
],
|
| 281 |
+
"name": [
|
| 282 |
+
"San Francisco",
|
| 283 |
+
"Los Angeles",
|
| 284 |
+
"Columbus",
|
| 285 |
+
"Cleveland",
|
| 286 |
+
"Munich",
|
| 287 |
+
"Duesseldorf",
|
| 288 |
+
"Koeln",
|
| 289 |
+
],
|
| 290 |
+
"pop": [12345, 12346, 1234, 1236, 12347, 1238, 1239],
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
expected = DataFrame(ex_data, columns=result.columns)
|
| 294 |
+
tm.assert_frame_equal(result, expected)
|
| 295 |
+
|
| 296 |
+
def test_shallow_nested(self):
|
| 297 |
+
data = [
|
| 298 |
+
{
|
| 299 |
+
"state": "Florida",
|
| 300 |
+
"shortname": "FL",
|
| 301 |
+
"info": {"governor": "Rick Scott"},
|
| 302 |
+
"counties": [
|
| 303 |
+
{"name": "Dade", "population": 12345},
|
| 304 |
+
{"name": "Broward", "population": 40000},
|
| 305 |
+
{"name": "Palm Beach", "population": 60000},
|
| 306 |
+
],
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"state": "Ohio",
|
| 310 |
+
"shortname": "OH",
|
| 311 |
+
"info": {"governor": "John Kasich"},
|
| 312 |
+
"counties": [
|
| 313 |
+
{"name": "Summit", "population": 1234},
|
| 314 |
+
{"name": "Cuyahoga", "population": 1337},
|
| 315 |
+
],
|
| 316 |
+
},
|
| 317 |
+
]
|
| 318 |
+
|
| 319 |
+
result = json_normalize(
|
| 320 |
+
data, "counties", ["state", "shortname", ["info", "governor"]]
|
| 321 |
+
)
|
| 322 |
+
ex_data = {
|
| 323 |
+
"name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
|
| 324 |
+
"state": ["Florida"] * 3 + ["Ohio"] * 2,
|
| 325 |
+
"shortname": ["FL", "FL", "FL", "OH", "OH"],
|
| 326 |
+
"info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
|
| 327 |
+
"population": [12345, 40000, 60000, 1234, 1337],
|
| 328 |
+
}
|
| 329 |
+
expected = DataFrame(ex_data, columns=result.columns)
|
| 330 |
+
tm.assert_frame_equal(result, expected)
|
| 331 |
+
|
| 332 |
+
def test_nested_meta_path_with_nested_record_path(self, state_data):
|
| 333 |
+
# GH 27220
|
| 334 |
+
result = json_normalize(
|
| 335 |
+
data=state_data,
|
| 336 |
+
record_path=["counties"],
|
| 337 |
+
meta=["state", "shortname", ["info", "governor"]],
|
| 338 |
+
errors="ignore",
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
ex_data = {
|
| 342 |
+
"name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
|
| 343 |
+
"population": [12345, 40000, 60000, 1234, 1337],
|
| 344 |
+
"state": ["Florida"] * 3 + ["Ohio"] * 2,
|
| 345 |
+
"shortname": ["FL"] * 3 + ["OH"] * 2,
|
| 346 |
+
"info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
expected = DataFrame(ex_data)
|
| 350 |
+
tm.assert_frame_equal(result, expected)
|
| 351 |
+
|
| 352 |
+
def test_meta_name_conflict(self):
|
| 353 |
+
data = [
|
| 354 |
+
{
|
| 355 |
+
"foo": "hello",
|
| 356 |
+
"bar": "there",
|
| 357 |
+
"data": [
|
| 358 |
+
{"foo": "something", "bar": "else"},
|
| 359 |
+
{"foo": "something2", "bar": "else2"},
|
| 360 |
+
],
|
| 361 |
+
}
|
| 362 |
+
]
|
| 363 |
+
|
| 364 |
+
msg = r"Conflicting metadata name (foo|bar), need distinguishing prefix"
|
| 365 |
+
with pytest.raises(ValueError, match=msg):
|
| 366 |
+
json_normalize(data, "data", meta=["foo", "bar"])
|
| 367 |
+
|
| 368 |
+
result = json_normalize(data, "data", meta=["foo", "bar"], meta_prefix="meta")
|
| 369 |
+
|
| 370 |
+
for val in ["metafoo", "metabar", "foo", "bar"]:
|
| 371 |
+
assert val in result
|
| 372 |
+
|
| 373 |
+
def test_meta_parameter_not_modified(self):
|
| 374 |
+
# GH 18610
|
| 375 |
+
data = [
|
| 376 |
+
{
|
| 377 |
+
"foo": "hello",
|
| 378 |
+
"bar": "there",
|
| 379 |
+
"data": [
|
| 380 |
+
{"foo": "something", "bar": "else"},
|
| 381 |
+
{"foo": "something2", "bar": "else2"},
|
| 382 |
+
],
|
| 383 |
+
}
|
| 384 |
+
]
|
| 385 |
+
|
| 386 |
+
COLUMNS = ["foo", "bar"]
|
| 387 |
+
result = json_normalize(data, "data", meta=COLUMNS, meta_prefix="meta")
|
| 388 |
+
|
| 389 |
+
assert COLUMNS == ["foo", "bar"]
|
| 390 |
+
for val in ["metafoo", "metabar", "foo", "bar"]:
|
| 391 |
+
assert val in result
|
| 392 |
+
|
| 393 |
+
def test_record_prefix(self, state_data):
|
| 394 |
+
result = json_normalize(state_data[0], "counties")
|
| 395 |
+
expected = DataFrame(state_data[0]["counties"])
|
| 396 |
+
tm.assert_frame_equal(result, expected)
|
| 397 |
+
|
| 398 |
+
result = json_normalize(
|
| 399 |
+
state_data, "counties", meta="state", record_prefix="county_"
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
expected = []
|
| 403 |
+
for rec in state_data:
|
| 404 |
+
expected.extend(rec["counties"])
|
| 405 |
+
expected = DataFrame(expected)
|
| 406 |
+
expected = expected.rename(columns=lambda x: "county_" + x)
|
| 407 |
+
expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
|
| 408 |
+
|
| 409 |
+
tm.assert_frame_equal(result, expected)
|
| 410 |
+
|
| 411 |
+
def test_non_ascii_key(self):
|
| 412 |
+
testjson = (
|
| 413 |
+
b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
|
| 414 |
+
b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
|
| 415 |
+
).decode("utf8")
|
| 416 |
+
|
| 417 |
+
testdata = {
|
| 418 |
+
b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
|
| 419 |
+
"sub.A": [1, 3],
|
| 420 |
+
"sub.B": [2, 4],
|
| 421 |
+
}
|
| 422 |
+
expected = DataFrame(testdata)
|
| 423 |
+
|
| 424 |
+
result = json_normalize(json.loads(testjson))
|
| 425 |
+
tm.assert_frame_equal(result, expected)
|
| 426 |
+
|
| 427 |
+
def test_missing_field(self, author_missing_data):
|
| 428 |
+
# GH20030:
|
| 429 |
+
result = json_normalize(author_missing_data)
|
| 430 |
+
ex_data = [
|
| 431 |
+
{
|
| 432 |
+
"info": np.nan,
|
| 433 |
+
"info.created_at": np.nan,
|
| 434 |
+
"info.last_updated": np.nan,
|
| 435 |
+
"author_name.first": np.nan,
|
| 436 |
+
"author_name.last_name": np.nan,
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"info": None,
|
| 440 |
+
"info.created_at": "11/08/1993",
|
| 441 |
+
"info.last_updated": "26/05/2012",
|
| 442 |
+
"author_name.first": "Jane",
|
| 443 |
+
"author_name.last_name": "Doe",
|
| 444 |
+
},
|
| 445 |
+
]
|
| 446 |
+
expected = DataFrame(ex_data)
|
| 447 |
+
tm.assert_frame_equal(result, expected)
|
| 448 |
+
|
| 449 |
+
@pytest.mark.parametrize(
|
| 450 |
+
"max_level,expected",
|
| 451 |
+
[
|
| 452 |
+
(
|
| 453 |
+
0,
|
| 454 |
+
[
|
| 455 |
+
{
|
| 456 |
+
"TextField": "Some text",
|
| 457 |
+
"UserField": {"Id": "ID001", "Name": "Name001"},
|
| 458 |
+
"CreatedBy": {"Name": "User001"},
|
| 459 |
+
"Image": {"a": "b"},
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"TextField": "Some text",
|
| 463 |
+
"UserField": {"Id": "ID001", "Name": "Name001"},
|
| 464 |
+
"CreatedBy": {"Name": "User001"},
|
| 465 |
+
"Image": {"a": "b"},
|
| 466 |
+
},
|
| 467 |
+
],
|
| 468 |
+
),
|
| 469 |
+
(
|
| 470 |
+
1,
|
| 471 |
+
[
|
| 472 |
+
{
|
| 473 |
+
"TextField": "Some text",
|
| 474 |
+
"UserField.Id": "ID001",
|
| 475 |
+
"UserField.Name": "Name001",
|
| 476 |
+
"CreatedBy": {"Name": "User001"},
|
| 477 |
+
"Image": {"a": "b"},
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"TextField": "Some text",
|
| 481 |
+
"UserField.Id": "ID001",
|
| 482 |
+
"UserField.Name": "Name001",
|
| 483 |
+
"CreatedBy": {"Name": "User001"},
|
| 484 |
+
"Image": {"a": "b"},
|
| 485 |
+
},
|
| 486 |
+
],
|
| 487 |
+
),
|
| 488 |
+
],
|
| 489 |
+
)
|
| 490 |
+
def test_max_level_with_records_path(self, max_level, expected):
|
| 491 |
+
# GH23843: Enhanced JSON normalize
|
| 492 |
+
test_input = [
|
| 493 |
+
{
|
| 494 |
+
"CreatedBy": {"Name": "User001"},
|
| 495 |
+
"Lookup": [
|
| 496 |
+
{
|
| 497 |
+
"TextField": "Some text",
|
| 498 |
+
"UserField": {"Id": "ID001", "Name": "Name001"},
|
| 499 |
+
},
|
| 500 |
+
{
|
| 501 |
+
"TextField": "Some text",
|
| 502 |
+
"UserField": {"Id": "ID001", "Name": "Name001"},
|
| 503 |
+
},
|
| 504 |
+
],
|
| 505 |
+
"Image": {"a": "b"},
|
| 506 |
+
"tags": [
|
| 507 |
+
{"foo": "something", "bar": "else"},
|
| 508 |
+
{"foo": "something2", "bar": "else2"},
|
| 509 |
+
],
|
| 510 |
+
}
|
| 511 |
+
]
|
| 512 |
+
|
| 513 |
+
result = json_normalize(
|
| 514 |
+
test_input,
|
| 515 |
+
record_path=["Lookup"],
|
| 516 |
+
meta=[["CreatedBy"], ["Image"]],
|
| 517 |
+
max_level=max_level,
|
| 518 |
+
)
|
| 519 |
+
expected_df = DataFrame(data=expected, columns=result.columns.values)
|
| 520 |
+
tm.assert_equal(expected_df, result)
|
| 521 |
+
|
| 522 |
+
def test_nested_flattening_consistent(self):
|
| 523 |
+
# see gh-21537
|
| 524 |
+
df1 = json_normalize([{"A": {"B": 1}}])
|
| 525 |
+
df2 = json_normalize({"dummy": [{"A": {"B": 1}}]}, "dummy")
|
| 526 |
+
|
| 527 |
+
# They should be the same.
|
| 528 |
+
tm.assert_frame_equal(df1, df2)
|
| 529 |
+
|
| 530 |
+
def test_nonetype_record_path(self, nulls_fixture):
|
| 531 |
+
# see gh-30148
|
| 532 |
+
# should not raise TypeError
|
| 533 |
+
result = json_normalize(
|
| 534 |
+
[
|
| 535 |
+
{"state": "Texas", "info": nulls_fixture},
|
| 536 |
+
{"state": "Florida", "info": [{"i": 2}]},
|
| 537 |
+
],
|
| 538 |
+
record_path=["info"],
|
| 539 |
+
)
|
| 540 |
+
expected = DataFrame({"i": 2}, index=[0])
|
| 541 |
+
tm.assert_equal(result, expected)
|
| 542 |
+
|
| 543 |
+
@pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"'])
|
| 544 |
+
def test_non_list_record_path_errors(self, value):
|
| 545 |
+
# see gh-30148, GH 26284
|
| 546 |
+
parsed_value = json.loads(value)
|
| 547 |
+
test_input = {"state": "Texas", "info": parsed_value}
|
| 548 |
+
test_path = "info"
|
| 549 |
+
msg = (
|
| 550 |
+
f"{test_input} has non list value {parsed_value} for path {test_path}. "
|
| 551 |
+
"Must be list or null."
|
| 552 |
+
)
|
| 553 |
+
with pytest.raises(TypeError, match=msg):
|
| 554 |
+
json_normalize([test_input], record_path=[test_path])
|
| 555 |
+
|
| 556 |
+
def test_meta_non_iterable(self):
|
| 557 |
+
# GH 31507
|
| 558 |
+
data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]"""
|
| 559 |
+
|
| 560 |
+
result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
|
| 561 |
+
expected = DataFrame(
|
| 562 |
+
{"one": [1], "two": [2], "id": np.array([99], dtype=object)}
|
| 563 |
+
)
|
| 564 |
+
tm.assert_frame_equal(result, expected)
|
| 565 |
+
|
| 566 |
+
def test_generator(self, state_data):
|
| 567 |
+
# GH35923 Fix pd.json_normalize to not skip the first element of a
|
| 568 |
+
# generator input
|
| 569 |
+
def generator_data():
|
| 570 |
+
yield from state_data[0]["counties"]
|
| 571 |
+
|
| 572 |
+
result = json_normalize(generator_data())
|
| 573 |
+
expected = DataFrame(state_data[0]["counties"])
|
| 574 |
+
|
| 575 |
+
tm.assert_frame_equal(result, expected)
|
| 576 |
+
|
| 577 |
+
def test_top_column_with_leading_underscore(self):
|
| 578 |
+
# 49861
|
| 579 |
+
data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
|
| 580 |
+
result = json_normalize(data, sep="_")
|
| 581 |
+
expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])
|
| 582 |
+
|
| 583 |
+
tm.assert_frame_equal(result, expected)
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
class TestNestedToRecord:
|
| 587 |
+
def test_flat_stays_flat(self):
|
| 588 |
+
recs = [{"flat1": 1, "flat2": 2}, {"flat3": 3, "flat2": 4}]
|
| 589 |
+
result = nested_to_record(recs)
|
| 590 |
+
expected = recs
|
| 591 |
+
assert result == expected
|
| 592 |
+
|
| 593 |
+
def test_one_level_deep_flattens(self):
|
| 594 |
+
data = {"flat1": 1, "dict1": {"c": 1, "d": 2}}
|
| 595 |
+
|
| 596 |
+
result = nested_to_record(data)
|
| 597 |
+
expected = {"dict1.c": 1, "dict1.d": 2, "flat1": 1}
|
| 598 |
+
|
| 599 |
+
assert result == expected
|
| 600 |
+
|
| 601 |
+
def test_nested_flattens(self):
|
| 602 |
+
data = {
|
| 603 |
+
"flat1": 1,
|
| 604 |
+
"dict1": {"c": 1, "d": 2},
|
| 605 |
+
"nested": {"e": {"c": 1, "d": 2}, "d": 2},
|
| 606 |
+
}
|
| 607 |
+
|
| 608 |
+
result = nested_to_record(data)
|
| 609 |
+
expected = {
|
| 610 |
+
"dict1.c": 1,
|
| 611 |
+
"dict1.d": 2,
|
| 612 |
+
"flat1": 1,
|
| 613 |
+
"nested.d": 2,
|
| 614 |
+
"nested.e.c": 1,
|
| 615 |
+
"nested.e.d": 2,
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
assert result == expected
|
| 619 |
+
|
| 620 |
+
def test_json_normalize_errors(self, missing_metadata):
|
| 621 |
+
# GH14583:
|
| 622 |
+
# If meta keys are not always present a new option to set
|
| 623 |
+
# errors='ignore' has been implemented
|
| 624 |
+
|
| 625 |
+
msg = (
|
| 626 |
+
"Key 'name' not found. To replace missing values of "
|
| 627 |
+
"'name' with np.nan, pass in errors='ignore'"
|
| 628 |
+
)
|
| 629 |
+
with pytest.raises(KeyError, match=msg):
|
| 630 |
+
json_normalize(
|
| 631 |
+
data=missing_metadata,
|
| 632 |
+
record_path="addresses",
|
| 633 |
+
meta="name",
|
| 634 |
+
errors="raise",
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
def test_missing_meta(self, missing_metadata):
|
| 638 |
+
# GH25468
|
| 639 |
+
# If metadata is nullable with errors set to ignore, the null values
|
| 640 |
+
# should be numpy.nan values
|
| 641 |
+
result = json_normalize(
|
| 642 |
+
data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
|
| 643 |
+
)
|
| 644 |
+
ex_data = [
|
| 645 |
+
[9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
|
| 646 |
+
[8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
|
| 647 |
+
]
|
| 648 |
+
columns = ["number", "street", "city", "state", "zip", "name"]
|
| 649 |
+
expected = DataFrame(ex_data, columns=columns)
|
| 650 |
+
tm.assert_frame_equal(result, expected)
|
| 651 |
+
|
| 652 |
+
def test_missing_nested_meta(self):
|
| 653 |
+
# GH44312
|
| 654 |
+
# If errors="ignore" and nested metadata is null, we should return nan
|
| 655 |
+
data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
|
| 656 |
+
result = json_normalize(
|
| 657 |
+
data,
|
| 658 |
+
record_path="value",
|
| 659 |
+
meta=["meta", ["nested_meta", "leaf"]],
|
| 660 |
+
errors="ignore",
|
| 661 |
+
)
|
| 662 |
+
ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
|
| 663 |
+
columns = ["rec", "meta", "nested_meta.leaf"]
|
| 664 |
+
expected = DataFrame(ex_data, columns=columns).astype(
|
| 665 |
+
{"nested_meta.leaf": object}
|
| 666 |
+
)
|
| 667 |
+
tm.assert_frame_equal(result, expected)
|
| 668 |
+
|
| 669 |
+
# If errors="raise" and nested metadata is null, we should raise with the
|
| 670 |
+
# key of the first missing level
|
| 671 |
+
with pytest.raises(KeyError, match="'leaf' not found"):
|
| 672 |
+
json_normalize(
|
| 673 |
+
data,
|
| 674 |
+
record_path="value",
|
| 675 |
+
meta=["meta", ["nested_meta", "leaf"]],
|
| 676 |
+
errors="raise",
|
| 677 |
+
)
|
| 678 |
+
|
| 679 |
+
def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata):
|
| 680 |
+
# GH41876
|
| 681 |
+
# Ensure errors='raise' works as intended even when a record_path of length
|
| 682 |
+
# greater than one is passed in
|
| 683 |
+
msg = (
|
| 684 |
+
"Key 'name' not found. To replace missing values of "
|
| 685 |
+
"'name' with np.nan, pass in errors='ignore'"
|
| 686 |
+
)
|
| 687 |
+
with pytest.raises(KeyError, match=msg):
|
| 688 |
+
json_normalize(
|
| 689 |
+
data=missing_metadata,
|
| 690 |
+
record_path=["previous_residences", "cities"],
|
| 691 |
+
meta="name",
|
| 692 |
+
errors="raise",
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata):
|
| 696 |
+
# GH41876
|
| 697 |
+
# Ensure errors='ignore' works as intended even when a record_path of length
|
| 698 |
+
# greater than one is passed in
|
| 699 |
+
result = json_normalize(
|
| 700 |
+
data=missing_metadata,
|
| 701 |
+
record_path=["previous_residences", "cities"],
|
| 702 |
+
meta="name",
|
| 703 |
+
errors="ignore",
|
| 704 |
+
)
|
| 705 |
+
ex_data = [
|
| 706 |
+
["Foo York City", "Alice"],
|
| 707 |
+
["Barmingham", np.nan],
|
| 708 |
+
]
|
| 709 |
+
columns = ["city_name", "name"]
|
| 710 |
+
expected = DataFrame(ex_data, columns=columns)
|
| 711 |
+
tm.assert_frame_equal(result, expected)
|
| 712 |
+
|
| 713 |
+
def test_donot_drop_nonevalues(self):
|
| 714 |
+
# GH21356
|
| 715 |
+
data = [
|
| 716 |
+
{"info": None, "author_name": {"first": "Smith", "last_name": "Appleseed"}},
|
| 717 |
+
{
|
| 718 |
+
"info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
|
| 719 |
+
"author_name": {"first": "Jane", "last_name": "Doe"},
|
| 720 |
+
},
|
| 721 |
+
]
|
| 722 |
+
result = nested_to_record(data)
|
| 723 |
+
expected = [
|
| 724 |
+
{
|
| 725 |
+
"info": None,
|
| 726 |
+
"author_name.first": "Smith",
|
| 727 |
+
"author_name.last_name": "Appleseed",
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
"author_name.first": "Jane",
|
| 731 |
+
"author_name.last_name": "Doe",
|
| 732 |
+
"info.created_at": "11/08/1993",
|
| 733 |
+
"info.last_updated": "26/05/2012",
|
| 734 |
+
},
|
| 735 |
+
]
|
| 736 |
+
|
| 737 |
+
assert result == expected
|
| 738 |
+
|
| 739 |
+
def test_nonetype_top_level_bottom_level(self):
|
| 740 |
+
# GH21158: If inner level json has a key with a null value
|
| 741 |
+
# make sure it does not do a new_d.pop twice and except
|
| 742 |
+
data = {
|
| 743 |
+
"id": None,
|
| 744 |
+
"location": {
|
| 745 |
+
"country": {
|
| 746 |
+
"state": {
|
| 747 |
+
"id": None,
|
| 748 |
+
"town.info": {
|
| 749 |
+
"id": None,
|
| 750 |
+
"region": None,
|
| 751 |
+
"x": 49.151580810546875,
|
| 752 |
+
"y": -33.148521423339844,
|
| 753 |
+
"z": 27.572303771972656,
|
| 754 |
+
},
|
| 755 |
+
}
|
| 756 |
+
}
|
| 757 |
+
},
|
| 758 |
+
}
|
| 759 |
+
result = nested_to_record(data)
|
| 760 |
+
expected = {
|
| 761 |
+
"id": None,
|
| 762 |
+
"location.country.state.id": None,
|
| 763 |
+
"location.country.state.town.info.id": None,
|
| 764 |
+
"location.country.state.town.info.region": None,
|
| 765 |
+
"location.country.state.town.info.x": 49.151580810546875,
|
| 766 |
+
"location.country.state.town.info.y": -33.148521423339844,
|
| 767 |
+
"location.country.state.town.info.z": 27.572303771972656,
|
| 768 |
+
}
|
| 769 |
+
assert result == expected
|
| 770 |
+
|
| 771 |
+
def test_nonetype_multiple_levels(self):
|
| 772 |
+
# GH21158: If inner level json has a key with a null value
|
| 773 |
+
# make sure it does not do a new_d.pop twice and except
|
| 774 |
+
data = {
|
| 775 |
+
"id": None,
|
| 776 |
+
"location": {
|
| 777 |
+
"id": None,
|
| 778 |
+
"country": {
|
| 779 |
+
"id": None,
|
| 780 |
+
"state": {
|
| 781 |
+
"id": None,
|
| 782 |
+
"town.info": {
|
| 783 |
+
"region": None,
|
| 784 |
+
"x": 49.151580810546875,
|
| 785 |
+
"y": -33.148521423339844,
|
| 786 |
+
"z": 27.572303771972656,
|
| 787 |
+
},
|
| 788 |
+
},
|
| 789 |
+
},
|
| 790 |
+
},
|
| 791 |
+
}
|
| 792 |
+
result = nested_to_record(data)
|
| 793 |
+
expected = {
|
| 794 |
+
"id": None,
|
| 795 |
+
"location.id": None,
|
| 796 |
+
"location.country.id": None,
|
| 797 |
+
"location.country.state.id": None,
|
| 798 |
+
"location.country.state.town.info.region": None,
|
| 799 |
+
"location.country.state.town.info.x": 49.151580810546875,
|
| 800 |
+
"location.country.state.town.info.y": -33.148521423339844,
|
| 801 |
+
"location.country.state.town.info.z": 27.572303771972656,
|
| 802 |
+
}
|
| 803 |
+
assert result == expected
|
| 804 |
+
|
| 805 |
+
@pytest.mark.parametrize(
|
| 806 |
+
"max_level, expected",
|
| 807 |
+
[
|
| 808 |
+
(
|
| 809 |
+
None,
|
| 810 |
+
[
|
| 811 |
+
{
|
| 812 |
+
"CreatedBy.Name": "User001",
|
| 813 |
+
"Lookup.TextField": "Some text",
|
| 814 |
+
"Lookup.UserField.Id": "ID001",
|
| 815 |
+
"Lookup.UserField.Name": "Name001",
|
| 816 |
+
"Image.a": "b",
|
| 817 |
+
}
|
| 818 |
+
],
|
| 819 |
+
),
|
| 820 |
+
(
|
| 821 |
+
0,
|
| 822 |
+
[
|
| 823 |
+
{
|
| 824 |
+
"CreatedBy": {"Name": "User001"},
|
| 825 |
+
"Lookup": {
|
| 826 |
+
"TextField": "Some text",
|
| 827 |
+
"UserField": {"Id": "ID001", "Name": "Name001"},
|
| 828 |
+
},
|
| 829 |
+
"Image": {"a": "b"},
|
| 830 |
+
}
|
| 831 |
+
],
|
| 832 |
+
),
|
| 833 |
+
(
|
| 834 |
+
1,
|
| 835 |
+
[
|
| 836 |
+
{
|
| 837 |
+
"CreatedBy.Name": "User001",
|
| 838 |
+
"Lookup.TextField": "Some text",
|
| 839 |
+
"Lookup.UserField": {"Id": "ID001", "Name": "Name001"},
|
| 840 |
+
"Image.a": "b",
|
| 841 |
+
}
|
| 842 |
+
],
|
| 843 |
+
),
|
| 844 |
+
],
|
| 845 |
+
)
|
| 846 |
+
def test_with_max_level(self, max_level, expected, max_level_test_input_data):
|
| 847 |
+
# GH23843: Enhanced JSON normalize
|
| 848 |
+
output = nested_to_record(max_level_test_input_data, max_level=max_level)
|
| 849 |
+
assert output == expected
|
| 850 |
+
|
| 851 |
+
def test_with_large_max_level(self):
|
| 852 |
+
# GH23843: Enhanced JSON normalize
|
| 853 |
+
max_level = 100
|
| 854 |
+
input_data = [
|
| 855 |
+
{
|
| 856 |
+
"CreatedBy": {
|
| 857 |
+
"user": {
|
| 858 |
+
"name": {"firstname": "Leo", "LastName": "Thomson"},
|
| 859 |
+
"family_tree": {
|
| 860 |
+
"father": {
|
| 861 |
+
"name": "Father001",
|
| 862 |
+
"father": {
|
| 863 |
+
"Name": "Father002",
|
| 864 |
+
"father": {
|
| 865 |
+
"name": "Father003",
|
| 866 |
+
"father": {"Name": "Father004"},
|
| 867 |
+
},
|
| 868 |
+
},
|
| 869 |
+
}
|
| 870 |
+
},
|
| 871 |
+
}
|
| 872 |
+
}
|
| 873 |
+
}
|
| 874 |
+
]
|
| 875 |
+
expected = [
|
| 876 |
+
{
|
| 877 |
+
"CreatedBy.user.name.firstname": "Leo",
|
| 878 |
+
"CreatedBy.user.name.LastName": "Thomson",
|
| 879 |
+
"CreatedBy.user.family_tree.father.name": "Father001",
|
| 880 |
+
"CreatedBy.user.family_tree.father.father.Name": "Father002",
|
| 881 |
+
"CreatedBy.user.family_tree.father.father.father.name": "Father003",
|
| 882 |
+
"CreatedBy.user.family_tree.father.father.father.father.Name": "Father004", # noqa: E501
|
| 883 |
+
}
|
| 884 |
+
]
|
| 885 |
+
output = nested_to_record(input_data, max_level=max_level)
|
| 886 |
+
assert output == expected
|
| 887 |
+
|
| 888 |
+
def test_series_non_zero_index(self):
|
| 889 |
+
# GH 19020
|
| 890 |
+
data = {
|
| 891 |
+
0: {"id": 1, "name": "Foo", "elements": {"a": 1}},
|
| 892 |
+
1: {"id": 2, "name": "Bar", "elements": {"b": 2}},
|
| 893 |
+
2: {"id": 3, "name": "Baz", "elements": {"c": 3}},
|
| 894 |
+
}
|
| 895 |
+
s = Series(data)
|
| 896 |
+
s.index = [1, 2, 3]
|
| 897 |
+
result = json_normalize(s)
|
| 898 |
+
expected = DataFrame(
|
| 899 |
+
{
|
| 900 |
+
"id": [1, 2, 3],
|
| 901 |
+
"name": ["Foo", "Bar", "Baz"],
|
| 902 |
+
"elements.a": [1.0, np.nan, np.nan],
|
| 903 |
+
"elements.b": [np.nan, 2.0, np.nan],
|
| 904 |
+
"elements.c": [np.nan, np.nan, 3.0],
|
| 905 |
+
}
|
| 906 |
+
)
|
| 907 |
+
tm.assert_frame_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py
ADDED
|
@@ -0,0 +1,2188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
from datetime import timedelta
|
| 3 |
+
from decimal import Decimal
|
| 4 |
+
from io import (
|
| 5 |
+
BytesIO,
|
| 6 |
+
StringIO,
|
| 7 |
+
)
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
import pytest
|
| 15 |
+
|
| 16 |
+
from pandas._config import using_string_dtype
|
| 17 |
+
|
| 18 |
+
from pandas.compat import IS64
|
| 19 |
+
import pandas.util._test_decorators as td
|
| 20 |
+
|
| 21 |
+
import pandas as pd
|
| 22 |
+
from pandas import (
|
| 23 |
+
NA,
|
| 24 |
+
DataFrame,
|
| 25 |
+
DatetimeIndex,
|
| 26 |
+
Index,
|
| 27 |
+
RangeIndex,
|
| 28 |
+
Series,
|
| 29 |
+
Timestamp,
|
| 30 |
+
date_range,
|
| 31 |
+
read_json,
|
| 32 |
+
)
|
| 33 |
+
import pandas._testing as tm
|
| 34 |
+
|
| 35 |
+
from pandas.io.json import ujson_dumps
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_literal_json_deprecation():
|
| 39 |
+
# PR 53409
|
| 40 |
+
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
|
| 41 |
+
|
| 42 |
+
jsonl = """{"a": 1, "b": 2}
|
| 43 |
+
{"a": 3, "b": 4}
|
| 44 |
+
{"a": 5, "b": 6}
|
| 45 |
+
{"a": 7, "b": 8}"""
|
| 46 |
+
|
| 47 |
+
msg = (
|
| 48 |
+
"Passing literal json to 'read_json' is deprecated and "
|
| 49 |
+
"will be removed in a future version. To read from a "
|
| 50 |
+
"literal string, wrap it in a 'StringIO' object."
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 54 |
+
try:
|
| 55 |
+
read_json(jsonl, lines=False)
|
| 56 |
+
except ValueError:
|
| 57 |
+
pass
|
| 58 |
+
|
| 59 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 60 |
+
read_json(expected.to_json(), lines=False)
|
| 61 |
+
|
| 62 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 63 |
+
result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
|
| 64 |
+
tm.assert_frame_equal(result, expected)
|
| 65 |
+
|
| 66 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 67 |
+
try:
|
| 68 |
+
result = read_json(
|
| 69 |
+
'{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n',
|
| 70 |
+
lines=False,
|
| 71 |
+
)
|
| 72 |
+
except ValueError:
|
| 73 |
+
pass
|
| 74 |
+
|
| 75 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 76 |
+
try:
|
| 77 |
+
result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=False)
|
| 78 |
+
except ValueError:
|
| 79 |
+
pass
|
| 80 |
+
tm.assert_frame_equal(result, expected)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def assert_json_roundtrip_equal(result, expected, orient):
|
| 84 |
+
if orient in ("records", "values"):
|
| 85 |
+
expected = expected.reset_index(drop=True)
|
| 86 |
+
if orient == "values":
|
| 87 |
+
expected.columns = range(len(expected.columns))
|
| 88 |
+
tm.assert_frame_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class TestPandasContainer:
|
| 92 |
+
@pytest.fixture
|
| 93 |
+
def categorical_frame(self):
|
| 94 |
+
data = {
|
| 95 |
+
c: np.random.default_rng(i).standard_normal(30)
|
| 96 |
+
for i, c in enumerate(list("ABCD"))
|
| 97 |
+
}
|
| 98 |
+
cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * 15
|
| 99 |
+
data["E"] = list(reversed(cat))
|
| 100 |
+
data["sort"] = np.arange(30, dtype="int64")
|
| 101 |
+
return DataFrame(data, index=pd.CategoricalIndex(cat, name="E"))
|
| 102 |
+
|
| 103 |
+
@pytest.fixture
|
| 104 |
+
def datetime_series(self):
|
| 105 |
+
# Same as usual datetime_series, but with index freq set to None,
|
| 106 |
+
# since that doesn't round-trip, see GH#33711
|
| 107 |
+
ser = Series(
|
| 108 |
+
1.1 * np.arange(10, dtype=np.float64),
|
| 109 |
+
index=date_range("2020-01-01", periods=10),
|
| 110 |
+
name="ts",
|
| 111 |
+
)
|
| 112 |
+
ser.index = ser.index._with_freq(None)
|
| 113 |
+
return ser
|
| 114 |
+
|
| 115 |
+
@pytest.fixture
|
| 116 |
+
def datetime_frame(self):
|
| 117 |
+
# Same as usual datetime_frame, but with index freq set to None,
|
| 118 |
+
# since that doesn't round-trip, see GH#33711
|
| 119 |
+
df = DataFrame(
|
| 120 |
+
np.random.default_rng(2).standard_normal((30, 4)),
|
| 121 |
+
columns=Index(list("ABCD")),
|
| 122 |
+
index=date_range("2000-01-01", periods=30, freq="B"),
|
| 123 |
+
)
|
| 124 |
+
df.index = df.index._with_freq(None)
|
| 125 |
+
return df
|
| 126 |
+
|
| 127 |
+
def test_frame_double_encoded_labels(self, orient):
|
| 128 |
+
df = DataFrame(
|
| 129 |
+
[["a", "b"], ["c", "d"]],
|
| 130 |
+
index=['index " 1', "index / 2"],
|
| 131 |
+
columns=["a \\ b", "y / z"],
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
data = StringIO(df.to_json(orient=orient))
|
| 135 |
+
result = read_json(data, orient=orient)
|
| 136 |
+
expected = df.copy()
|
| 137 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 138 |
+
|
| 139 |
+
@pytest.mark.parametrize("orient", ["split", "records", "values"])
|
| 140 |
+
def test_frame_non_unique_index(self, orient):
|
| 141 |
+
df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
|
| 142 |
+
data = StringIO(df.to_json(orient=orient))
|
| 143 |
+
result = read_json(data, orient=orient)
|
| 144 |
+
expected = df.copy()
|
| 145 |
+
|
| 146 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 147 |
+
|
| 148 |
+
@pytest.mark.parametrize("orient", ["index", "columns"])
|
| 149 |
+
def test_frame_non_unique_index_raises(self, orient):
|
| 150 |
+
df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
|
| 151 |
+
msg = f"DataFrame index must be unique for orient='{orient}'"
|
| 152 |
+
with pytest.raises(ValueError, match=msg):
|
| 153 |
+
df.to_json(orient=orient)
|
| 154 |
+
|
| 155 |
+
@pytest.mark.parametrize("orient", ["split", "values"])
|
| 156 |
+
@pytest.mark.parametrize(
|
| 157 |
+
"data",
|
| 158 |
+
[
|
| 159 |
+
[["a", "b"], ["c", "d"]],
|
| 160 |
+
[[1.5, 2.5], [3.5, 4.5]],
|
| 161 |
+
[[1, 2.5], [3, 4.5]],
|
| 162 |
+
[[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
|
| 163 |
+
],
|
| 164 |
+
)
|
| 165 |
+
def test_frame_non_unique_columns(self, orient, data):
|
| 166 |
+
df = DataFrame(data, index=[1, 2], columns=["x", "x"])
|
| 167 |
+
|
| 168 |
+
result = read_json(
|
| 169 |
+
StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"]
|
| 170 |
+
)
|
| 171 |
+
if orient == "values":
|
| 172 |
+
expected = DataFrame(data)
|
| 173 |
+
if expected.iloc[:, 0].dtype == "datetime64[ns]":
|
| 174 |
+
# orient == "values" by default will write Timestamp objects out
|
| 175 |
+
# in milliseconds; these are internally stored in nanosecond,
|
| 176 |
+
# so divide to get where we need
|
| 177 |
+
# TODO: a to_epoch method would also solve; see GH 14772
|
| 178 |
+
expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
|
| 179 |
+
elif orient == "split":
|
| 180 |
+
expected = df
|
| 181 |
+
expected.columns = ["x", "x.1"]
|
| 182 |
+
|
| 183 |
+
tm.assert_frame_equal(result, expected)
|
| 184 |
+
|
| 185 |
+
@pytest.mark.parametrize("orient", ["index", "columns", "records"])
|
| 186 |
+
def test_frame_non_unique_columns_raises(self, orient):
|
| 187 |
+
df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])
|
| 188 |
+
|
| 189 |
+
msg = f"DataFrame columns must be unique for orient='{orient}'"
|
| 190 |
+
with pytest.raises(ValueError, match=msg):
|
| 191 |
+
df.to_json(orient=orient)
|
| 192 |
+
|
| 193 |
+
def test_frame_default_orient(self, float_frame):
|
| 194 |
+
assert float_frame.to_json() == float_frame.to_json(orient="columns")
|
| 195 |
+
|
| 196 |
+
@pytest.mark.parametrize("dtype", [False, float])
|
| 197 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 198 |
+
def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
|
| 199 |
+
data = StringIO(float_frame.to_json(orient=orient))
|
| 200 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
|
| 201 |
+
|
| 202 |
+
expected = float_frame
|
| 203 |
+
|
| 204 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 205 |
+
|
| 206 |
+
@pytest.mark.parametrize("dtype", [False, np.int64])
|
| 207 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 208 |
+
def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
|
| 209 |
+
data = StringIO(int_frame.to_json(orient=orient))
|
| 210 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
|
| 211 |
+
expected = int_frame
|
| 212 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 213 |
+
|
| 214 |
+
@pytest.mark.parametrize("dtype", [None, np.float64, int, "U3"])
|
| 215 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 216 |
+
def test_roundtrip_str_axes(self, orient, convert_axes, dtype):
|
| 217 |
+
df = DataFrame(
|
| 218 |
+
np.zeros((200, 4)),
|
| 219 |
+
columns=[str(i) for i in range(4)],
|
| 220 |
+
index=[str(i) for i in range(200)],
|
| 221 |
+
dtype=dtype,
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
data = StringIO(df.to_json(orient=orient))
|
| 225 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
|
| 226 |
+
|
| 227 |
+
expected = df.copy()
|
| 228 |
+
if not dtype:
|
| 229 |
+
expected = expected.astype(np.int64)
|
| 230 |
+
|
| 231 |
+
# index columns, and records orients cannot fully preserve the string
|
| 232 |
+
# dtype for axes as the index and column labels are used as keys in
|
| 233 |
+
# JSON objects. JSON keys are by definition strings, so there's no way
|
| 234 |
+
# to disambiguate whether those keys actually were strings or numeric
|
| 235 |
+
# beforehand and numeric wins out.
|
| 236 |
+
if convert_axes and (orient in ("index", "columns")):
|
| 237 |
+
expected.columns = expected.columns.astype(np.int64)
|
| 238 |
+
expected.index = expected.index.astype(np.int64)
|
| 239 |
+
elif orient == "records" and convert_axes:
|
| 240 |
+
expected.columns = expected.columns.astype(np.int64)
|
| 241 |
+
elif convert_axes and orient == "split":
|
| 242 |
+
expected.columns = expected.columns.astype(np.int64)
|
| 243 |
+
|
| 244 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 245 |
+
|
| 246 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 247 |
+
def test_roundtrip_categorical(
|
| 248 |
+
self, request, orient, categorical_frame, convert_axes, using_infer_string
|
| 249 |
+
):
|
| 250 |
+
# TODO: create a better frame to test with and improve coverage
|
| 251 |
+
if orient in ("index", "columns"):
|
| 252 |
+
request.applymarker(
|
| 253 |
+
pytest.mark.xfail(
|
| 254 |
+
reason=f"Can't have duplicate index values for orient '{orient}')"
|
| 255 |
+
)
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
data = StringIO(categorical_frame.to_json(orient=orient))
|
| 259 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes)
|
| 260 |
+
|
| 261 |
+
expected = categorical_frame.copy()
|
| 262 |
+
expected.index = expected.index.astype(
|
| 263 |
+
str if not using_infer_string else "str"
|
| 264 |
+
) # Categorical not preserved
|
| 265 |
+
expected.index.name = None # index names aren't preserved in JSON
|
| 266 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 267 |
+
|
| 268 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 269 |
+
def test_roundtrip_empty(self, orient, convert_axes):
|
| 270 |
+
empty_frame = DataFrame()
|
| 271 |
+
data = StringIO(empty_frame.to_json(orient=orient))
|
| 272 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes)
|
| 273 |
+
if orient == "split":
|
| 274 |
+
idx = Index([], dtype=(float if convert_axes else object))
|
| 275 |
+
expected = DataFrame(index=idx, columns=idx)
|
| 276 |
+
elif orient in ["index", "columns"]:
|
| 277 |
+
expected = DataFrame()
|
| 278 |
+
else:
|
| 279 |
+
expected = empty_frame.copy()
|
| 280 |
+
|
| 281 |
+
tm.assert_frame_equal(result, expected)
|
| 282 |
+
|
| 283 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 284 |
+
def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
|
| 285 |
+
# TODO: improve coverage with date_format parameter
|
| 286 |
+
data = StringIO(datetime_frame.to_json(orient=orient))
|
| 287 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes)
|
| 288 |
+
expected = datetime_frame.copy()
|
| 289 |
+
|
| 290 |
+
if not convert_axes: # one off for ts handling
|
| 291 |
+
# DTI gets converted to epoch values
|
| 292 |
+
idx = expected.index.view(np.int64) // 1000000
|
| 293 |
+
if orient != "split": # TODO: handle consistently across orients
|
| 294 |
+
idx = idx.astype(str)
|
| 295 |
+
|
| 296 |
+
expected.index = idx
|
| 297 |
+
|
| 298 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 299 |
+
|
| 300 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 301 |
+
def test_roundtrip_mixed(self, orient, convert_axes):
|
| 302 |
+
index = Index(["a", "b", "c", "d", "e"])
|
| 303 |
+
values = {
|
| 304 |
+
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
|
| 305 |
+
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
|
| 306 |
+
"C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
|
| 307 |
+
"D": [True, False, True, False, True],
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
df = DataFrame(data=values, index=index)
|
| 311 |
+
|
| 312 |
+
data = StringIO(df.to_json(orient=orient))
|
| 313 |
+
result = read_json(data, orient=orient, convert_axes=convert_axes)
|
| 314 |
+
|
| 315 |
+
expected = df.copy()
|
| 316 |
+
expected = expected.assign(**expected.select_dtypes("number").astype(np.int64))
|
| 317 |
+
|
| 318 |
+
assert_json_roundtrip_equal(result, expected, orient)
|
| 319 |
+
|
| 320 |
+
@pytest.mark.xfail(
|
| 321 |
+
reason="#50456 Column multiindex is stored and loaded differently",
|
| 322 |
+
raises=AssertionError,
|
| 323 |
+
)
|
| 324 |
+
@pytest.mark.parametrize(
|
| 325 |
+
"columns",
|
| 326 |
+
[
|
| 327 |
+
[["2022", "2022"], ["JAN", "FEB"]],
|
| 328 |
+
[["2022", "2023"], ["JAN", "JAN"]],
|
| 329 |
+
[["2022", "2022"], ["JAN", "JAN"]],
|
| 330 |
+
],
|
| 331 |
+
)
|
| 332 |
+
def test_roundtrip_multiindex(self, columns):
|
| 333 |
+
df = DataFrame(
|
| 334 |
+
[[1, 2], [3, 4]],
|
| 335 |
+
columns=pd.MultiIndex.from_arrays(columns),
|
| 336 |
+
)
|
| 337 |
+
data = StringIO(df.to_json(orient="split"))
|
| 338 |
+
result = read_json(data, orient="split")
|
| 339 |
+
tm.assert_frame_equal(result, df)
|
| 340 |
+
|
| 341 |
+
@pytest.mark.parametrize(
|
| 342 |
+
"data,msg,orient",
|
| 343 |
+
[
|
| 344 |
+
('{"key":b:a:d}', "Expected object or value", "columns"),
|
| 345 |
+
# too few indices
|
| 346 |
+
(
|
| 347 |
+
'{"columns":["A","B"],'
|
| 348 |
+
'"index":["2","3"],'
|
| 349 |
+
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
|
| 350 |
+
"|".join(
|
| 351 |
+
[
|
| 352 |
+
r"Length of values \(3\) does not match length of index \(2\)",
|
| 353 |
+
]
|
| 354 |
+
),
|
| 355 |
+
"split",
|
| 356 |
+
),
|
| 357 |
+
# too many columns
|
| 358 |
+
(
|
| 359 |
+
'{"columns":["A","B","C"],'
|
| 360 |
+
'"index":["1","2","3"],'
|
| 361 |
+
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
|
| 362 |
+
"3 columns passed, passed data had 2 columns",
|
| 363 |
+
"split",
|
| 364 |
+
),
|
| 365 |
+
# bad key
|
| 366 |
+
(
|
| 367 |
+
'{"badkey":["A","B"],'
|
| 368 |
+
'"index":["2","3"],'
|
| 369 |
+
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
|
| 370 |
+
r"unexpected key\(s\): badkey",
|
| 371 |
+
"split",
|
| 372 |
+
),
|
| 373 |
+
],
|
| 374 |
+
)
|
| 375 |
+
def test_frame_from_json_bad_data_raises(self, data, msg, orient):
|
| 376 |
+
with pytest.raises(ValueError, match=msg):
|
| 377 |
+
read_json(StringIO(data), orient=orient)
|
| 378 |
+
|
| 379 |
+
@pytest.mark.parametrize("dtype", [True, False])
|
| 380 |
+
@pytest.mark.parametrize("convert_axes", [True, False])
|
| 381 |
+
def test_frame_from_json_missing_data(self, orient, convert_axes, dtype):
|
| 382 |
+
num_df = DataFrame([[1, 2], [4, 5, 6]])
|
| 383 |
+
|
| 384 |
+
result = read_json(
|
| 385 |
+
StringIO(num_df.to_json(orient=orient)),
|
| 386 |
+
orient=orient,
|
| 387 |
+
convert_axes=convert_axes,
|
| 388 |
+
dtype=dtype,
|
| 389 |
+
)
|
| 390 |
+
assert np.isnan(result.iloc[0, 2])
|
| 391 |
+
|
| 392 |
+
obj_df = DataFrame([["1", "2"], ["4", "5", "6"]])
|
| 393 |
+
result = read_json(
|
| 394 |
+
StringIO(obj_df.to_json(orient=orient)),
|
| 395 |
+
orient=orient,
|
| 396 |
+
convert_axes=convert_axes,
|
| 397 |
+
dtype=dtype,
|
| 398 |
+
)
|
| 399 |
+
assert np.isnan(result.iloc[0, 2])
|
| 400 |
+
|
| 401 |
+
@pytest.mark.parametrize("dtype", [True, False])
|
| 402 |
+
def test_frame_read_json_dtype_missing_value(self, dtype):
|
| 403 |
+
# GH28501 Parse missing values using read_json with dtype=False
|
| 404 |
+
# to NaN instead of None
|
| 405 |
+
result = read_json(StringIO("[null]"), dtype=dtype)
|
| 406 |
+
expected = DataFrame([np.nan])
|
| 407 |
+
|
| 408 |
+
tm.assert_frame_equal(result, expected)
|
| 409 |
+
|
| 410 |
+
@pytest.mark.parametrize("inf", [np.inf, -np.inf])
|
| 411 |
+
@pytest.mark.parametrize("dtype", [True, False])
|
| 412 |
+
def test_frame_infinity(self, inf, dtype):
|
| 413 |
+
# infinities get mapped to nulls which get mapped to NaNs during
|
| 414 |
+
# deserialisation
|
| 415 |
+
df = DataFrame([[1, 2], [4, 5, 6]])
|
| 416 |
+
df.loc[0, 2] = inf
|
| 417 |
+
|
| 418 |
+
data = StringIO(df.to_json())
|
| 419 |
+
result = read_json(data, dtype=dtype)
|
| 420 |
+
assert np.isnan(result.iloc[0, 2])
|
| 421 |
+
|
| 422 |
+
@pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
|
| 423 |
+
@pytest.mark.parametrize(
|
| 424 |
+
"value,precision,expected_val",
|
| 425 |
+
[
|
| 426 |
+
(0.95, 1, 1.0),
|
| 427 |
+
(1.95, 1, 2.0),
|
| 428 |
+
(-1.95, 1, -2.0),
|
| 429 |
+
(0.995, 2, 1.0),
|
| 430 |
+
(0.9995, 3, 1.0),
|
| 431 |
+
(0.99999999999999944, 15, 1.0),
|
| 432 |
+
],
|
| 433 |
+
)
|
| 434 |
+
def test_frame_to_json_float_precision(self, value, precision, expected_val):
|
| 435 |
+
df = DataFrame([{"a_float": value}])
|
| 436 |
+
encoded = df.to_json(double_precision=precision)
|
| 437 |
+
assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'
|
| 438 |
+
|
| 439 |
+
def test_frame_to_json_except(self):
|
| 440 |
+
df = DataFrame([1, 2, 3])
|
| 441 |
+
msg = "Invalid value 'garbage' for option 'orient'"
|
| 442 |
+
with pytest.raises(ValueError, match=msg):
|
| 443 |
+
df.to_json(orient="garbage")
|
| 444 |
+
|
| 445 |
+
def test_frame_empty(self):
|
| 446 |
+
df = DataFrame(columns=["jim", "joe"])
|
| 447 |
+
assert not df._is_mixed_type
|
| 448 |
+
|
| 449 |
+
data = StringIO(df.to_json())
|
| 450 |
+
result = read_json(data, dtype=dict(df.dtypes))
|
| 451 |
+
tm.assert_frame_equal(result, df, check_index_type=False)
|
| 452 |
+
|
| 453 |
+
def test_frame_empty_to_json(self):
|
| 454 |
+
# GH 7445
|
| 455 |
+
df = DataFrame({"test": []}, index=[])
|
| 456 |
+
result = df.to_json(orient="columns")
|
| 457 |
+
expected = '{"test":{}}'
|
| 458 |
+
assert result == expected
|
| 459 |
+
|
| 460 |
+
def test_frame_empty_mixedtype(self):
|
| 461 |
+
# mixed type
|
| 462 |
+
df = DataFrame(columns=["jim", "joe"])
|
| 463 |
+
df["joe"] = df["joe"].astype("i8")
|
| 464 |
+
assert df._is_mixed_type
|
| 465 |
+
data = df.to_json()
|
| 466 |
+
tm.assert_frame_equal(
|
| 467 |
+
read_json(StringIO(data), dtype=dict(df.dtypes)),
|
| 468 |
+
df,
|
| 469 |
+
check_index_type=False,
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
def test_frame_mixedtype_orient(self): # GH10289
|
| 473 |
+
vals = [
|
| 474 |
+
[10, 1, "foo", 0.1, 0.01],
|
| 475 |
+
[20, 2, "bar", 0.2, 0.02],
|
| 476 |
+
[30, 3, "baz", 0.3, 0.03],
|
| 477 |
+
[40, 4, "qux", 0.4, 0.04],
|
| 478 |
+
]
|
| 479 |
+
|
| 480 |
+
df = DataFrame(
|
| 481 |
+
vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"]
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
assert df._is_mixed_type
|
| 485 |
+
right = df.copy()
|
| 486 |
+
|
| 487 |
+
for orient in ["split", "index", "columns"]:
|
| 488 |
+
inp = StringIO(df.to_json(orient=orient))
|
| 489 |
+
left = read_json(inp, orient=orient, convert_axes=False)
|
| 490 |
+
tm.assert_frame_equal(left, right)
|
| 491 |
+
|
| 492 |
+
right.index = RangeIndex(len(df))
|
| 493 |
+
inp = StringIO(df.to_json(orient="records"))
|
| 494 |
+
left = read_json(inp, orient="records", convert_axes=False)
|
| 495 |
+
tm.assert_frame_equal(left, right)
|
| 496 |
+
|
| 497 |
+
right.columns = RangeIndex(df.shape[1])
|
| 498 |
+
inp = StringIO(df.to_json(orient="values"))
|
| 499 |
+
left = read_json(inp, orient="values", convert_axes=False)
|
| 500 |
+
tm.assert_frame_equal(left, right)
|
| 501 |
+
|
| 502 |
+
def test_v12_compat(self, datapath):
|
| 503 |
+
dti = date_range("2000-01-03", "2000-01-07")
|
| 504 |
+
# freq doesn't roundtrip
|
| 505 |
+
dti = DatetimeIndex(np.asarray(dti), freq=None)
|
| 506 |
+
df = DataFrame(
|
| 507 |
+
[
|
| 508 |
+
[1.56808523, 0.65727391, 1.81021139, -0.17251653],
|
| 509 |
+
[-0.2550111, -0.08072427, -0.03202878, -0.17581665],
|
| 510 |
+
[1.51493992, 0.11805825, 1.629455, -1.31506612],
|
| 511 |
+
[-0.02765498, 0.44679743, 0.33192641, -0.27885413],
|
| 512 |
+
[0.05951614, -2.69652057, 1.28163262, 0.34703478],
|
| 513 |
+
],
|
| 514 |
+
columns=["A", "B", "C", "D"],
|
| 515 |
+
index=dti,
|
| 516 |
+
)
|
| 517 |
+
df["date"] = Timestamp("19920106 18:21:32.12").as_unit("ns")
|
| 518 |
+
df.iloc[3, df.columns.get_loc("date")] = Timestamp("20130101")
|
| 519 |
+
df["modified"] = df["date"]
|
| 520 |
+
df.iloc[1, df.columns.get_loc("modified")] = pd.NaT
|
| 521 |
+
|
| 522 |
+
dirpath = datapath("io", "json", "data")
|
| 523 |
+
v12_json = os.path.join(dirpath, "tsframe_v012.json")
|
| 524 |
+
df_unser = read_json(v12_json)
|
| 525 |
+
tm.assert_frame_equal(df, df_unser)
|
| 526 |
+
|
| 527 |
+
df_iso = df.drop(["modified"], axis=1)
|
| 528 |
+
v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
|
| 529 |
+
df_unser_iso = read_json(v12_iso_json)
|
| 530 |
+
tm.assert_frame_equal(df_iso, df_unser_iso, check_column_type=False)
|
| 531 |
+
|
| 532 |
+
def test_blocks_compat_GH9037(self, using_infer_string):
|
| 533 |
+
index = date_range("20000101", periods=10, freq="h")
|
| 534 |
+
# freq doesn't round-trip
|
| 535 |
+
index = DatetimeIndex(list(index), freq=None)
|
| 536 |
+
|
| 537 |
+
df_mixed = DataFrame(
|
| 538 |
+
{
|
| 539 |
+
"float_1": [
|
| 540 |
+
-0.92077639,
|
| 541 |
+
0.77434435,
|
| 542 |
+
1.25234727,
|
| 543 |
+
0.61485564,
|
| 544 |
+
-0.60316077,
|
| 545 |
+
0.24653374,
|
| 546 |
+
0.28668979,
|
| 547 |
+
-2.51969012,
|
| 548 |
+
0.95748401,
|
| 549 |
+
-1.02970536,
|
| 550 |
+
],
|
| 551 |
+
"int_1": [
|
| 552 |
+
19680418,
|
| 553 |
+
75337055,
|
| 554 |
+
99973684,
|
| 555 |
+
65103179,
|
| 556 |
+
79373900,
|
| 557 |
+
40314334,
|
| 558 |
+
21290235,
|
| 559 |
+
4991321,
|
| 560 |
+
41903419,
|
| 561 |
+
16008365,
|
| 562 |
+
],
|
| 563 |
+
"str_1": [
|
| 564 |
+
"78c608f1",
|
| 565 |
+
"64a99743",
|
| 566 |
+
"13d2ff52",
|
| 567 |
+
"ca7f4af2",
|
| 568 |
+
"97236474",
|
| 569 |
+
"bde7e214",
|
| 570 |
+
"1a6bde47",
|
| 571 |
+
"b1190be5",
|
| 572 |
+
"7a669144",
|
| 573 |
+
"8d64d068",
|
| 574 |
+
],
|
| 575 |
+
"float_2": [
|
| 576 |
+
-0.0428278,
|
| 577 |
+
-1.80872357,
|
| 578 |
+
3.36042349,
|
| 579 |
+
-0.7573685,
|
| 580 |
+
-0.48217572,
|
| 581 |
+
0.86229683,
|
| 582 |
+
1.08935819,
|
| 583 |
+
0.93898739,
|
| 584 |
+
-0.03030452,
|
| 585 |
+
1.43366348,
|
| 586 |
+
],
|
| 587 |
+
"str_2": [
|
| 588 |
+
"14f04af9",
|
| 589 |
+
"d085da90",
|
| 590 |
+
"4bcfac83",
|
| 591 |
+
"81504caf",
|
| 592 |
+
"2ffef4a9",
|
| 593 |
+
"08e2f5c4",
|
| 594 |
+
"07e1af03",
|
| 595 |
+
"addbd4a7",
|
| 596 |
+
"1f6a09ba",
|
| 597 |
+
"4bfc4d87",
|
| 598 |
+
],
|
| 599 |
+
"int_2": [
|
| 600 |
+
86967717,
|
| 601 |
+
98098830,
|
| 602 |
+
51927505,
|
| 603 |
+
20372254,
|
| 604 |
+
12601730,
|
| 605 |
+
20884027,
|
| 606 |
+
34193846,
|
| 607 |
+
10561746,
|
| 608 |
+
24867120,
|
| 609 |
+
76131025,
|
| 610 |
+
],
|
| 611 |
+
},
|
| 612 |
+
index=index,
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
# JSON deserialisation always creates unicode strings
|
| 616 |
+
df_mixed.columns = df_mixed.columns.astype(
|
| 617 |
+
np.str_ if not using_infer_string else "str"
|
| 618 |
+
)
|
| 619 |
+
data = StringIO(df_mixed.to_json(orient="split"))
|
| 620 |
+
df_roundtrip = read_json(data, orient="split")
|
| 621 |
+
tm.assert_frame_equal(
|
| 622 |
+
df_mixed,
|
| 623 |
+
df_roundtrip,
|
| 624 |
+
check_index_type=True,
|
| 625 |
+
check_column_type=True,
|
| 626 |
+
by_blocks=True,
|
| 627 |
+
check_exact=True,
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
def test_frame_nonprintable_bytes(self):
|
| 631 |
+
# GH14256: failing column caused segfaults, if it is not the last one
|
| 632 |
+
|
| 633 |
+
class BinaryThing:
|
| 634 |
+
def __init__(self, hexed) -> None:
|
| 635 |
+
self.hexed = hexed
|
| 636 |
+
self.binary = bytes.fromhex(hexed)
|
| 637 |
+
|
| 638 |
+
def __str__(self) -> str:
|
| 639 |
+
return self.hexed
|
| 640 |
+
|
| 641 |
+
hexed = "574b4454ba8c5eb4f98a8f45"
|
| 642 |
+
binthing = BinaryThing(hexed)
|
| 643 |
+
|
| 644 |
+
# verify the proper conversion of printable content
|
| 645 |
+
df_printable = DataFrame({"A": [binthing.hexed]})
|
| 646 |
+
assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'
|
| 647 |
+
|
| 648 |
+
# check if non-printable content throws appropriate Exception
|
| 649 |
+
df_nonprintable = DataFrame({"A": [binthing]})
|
| 650 |
+
msg = "Unsupported UTF-8 sequence length when encoding string"
|
| 651 |
+
with pytest.raises(OverflowError, match=msg):
|
| 652 |
+
df_nonprintable.to_json()
|
| 653 |
+
|
| 654 |
+
# the same with multiple columns threw segfaults
|
| 655 |
+
df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"])
|
| 656 |
+
with pytest.raises(OverflowError, match=msg):
|
| 657 |
+
df_mixed.to_json()
|
| 658 |
+
|
| 659 |
+
# default_handler should resolve exceptions for non-string types
|
| 660 |
+
result = df_nonprintable.to_json(default_handler=str)
|
| 661 |
+
expected = f'{{"A":{{"0":"{hexed}"}}}}'
|
| 662 |
+
assert result == expected
|
| 663 |
+
assert (
|
| 664 |
+
df_mixed.to_json(default_handler=str)
|
| 665 |
+
== f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
|
| 666 |
+
)
|
| 667 |
+
|
| 668 |
+
def test_label_overflow(self):
|
| 669 |
+
# GH14256: buffer length not checked when writing label
|
| 670 |
+
result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json()
|
| 671 |
+
expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}'
|
| 672 |
+
assert result == expected
|
| 673 |
+
|
| 674 |
+
def test_series_non_unique_index(self):
|
| 675 |
+
s = Series(["a", "b"], index=[1, 1])
|
| 676 |
+
|
| 677 |
+
msg = "Series index must be unique for orient='index'"
|
| 678 |
+
with pytest.raises(ValueError, match=msg):
|
| 679 |
+
s.to_json(orient="index")
|
| 680 |
+
|
| 681 |
+
tm.assert_series_equal(
|
| 682 |
+
s,
|
| 683 |
+
read_json(
|
| 684 |
+
StringIO(s.to_json(orient="split")), orient="split", typ="series"
|
| 685 |
+
),
|
| 686 |
+
)
|
| 687 |
+
unserialized = read_json(
|
| 688 |
+
StringIO(s.to_json(orient="records")), orient="records", typ="series"
|
| 689 |
+
)
|
| 690 |
+
tm.assert_equal(s.values, unserialized.values)
|
| 691 |
+
|
| 692 |
+
def test_series_default_orient(self, string_series):
|
| 693 |
+
assert string_series.to_json() == string_series.to_json(orient="index")
|
| 694 |
+
|
| 695 |
+
def test_series_roundtrip_simple(self, orient, string_series, using_infer_string):
|
| 696 |
+
data = StringIO(string_series.to_json(orient=orient))
|
| 697 |
+
result = read_json(data, typ="series", orient=orient)
|
| 698 |
+
|
| 699 |
+
expected = string_series
|
| 700 |
+
if using_infer_string and orient in ("split", "index", "columns"):
|
| 701 |
+
# These schemas don't contain dtypes, so we infer string
|
| 702 |
+
expected.index = expected.index.astype("str")
|
| 703 |
+
if orient in ("values", "records"):
|
| 704 |
+
expected = expected.reset_index(drop=True)
|
| 705 |
+
if orient != "split":
|
| 706 |
+
expected.name = None
|
| 707 |
+
|
| 708 |
+
tm.assert_series_equal(result, expected)
|
| 709 |
+
|
| 710 |
+
@pytest.mark.parametrize("dtype", [False, None])
|
| 711 |
+
def test_series_roundtrip_object(self, orient, dtype, object_series):
|
| 712 |
+
data = StringIO(object_series.to_json(orient=orient))
|
| 713 |
+
result = read_json(data, typ="series", orient=orient, dtype=dtype)
|
| 714 |
+
|
| 715 |
+
expected = object_series
|
| 716 |
+
if orient in ("values", "records"):
|
| 717 |
+
expected = expected.reset_index(drop=True)
|
| 718 |
+
if orient != "split":
|
| 719 |
+
expected.name = None
|
| 720 |
+
|
| 721 |
+
if using_string_dtype():
|
| 722 |
+
expected = expected.astype("str")
|
| 723 |
+
|
| 724 |
+
tm.assert_series_equal(result, expected)
|
| 725 |
+
|
| 726 |
+
def test_series_roundtrip_empty(self, orient):
|
| 727 |
+
empty_series = Series([], index=[], dtype=np.float64)
|
| 728 |
+
data = StringIO(empty_series.to_json(orient=orient))
|
| 729 |
+
result = read_json(data, typ="series", orient=orient)
|
| 730 |
+
|
| 731 |
+
expected = empty_series.reset_index(drop=True)
|
| 732 |
+
if orient in ("split"):
|
| 733 |
+
expected.index = expected.index.astype(np.float64)
|
| 734 |
+
|
| 735 |
+
tm.assert_series_equal(result, expected)
|
| 736 |
+
|
| 737 |
+
def test_series_roundtrip_timeseries(self, orient, datetime_series):
|
| 738 |
+
data = StringIO(datetime_series.to_json(orient=orient))
|
| 739 |
+
result = read_json(data, typ="series", orient=orient)
|
| 740 |
+
|
| 741 |
+
expected = datetime_series
|
| 742 |
+
if orient in ("values", "records"):
|
| 743 |
+
expected = expected.reset_index(drop=True)
|
| 744 |
+
if orient != "split":
|
| 745 |
+
expected.name = None
|
| 746 |
+
|
| 747 |
+
tm.assert_series_equal(result, expected)
|
| 748 |
+
|
| 749 |
+
@pytest.mark.parametrize("dtype", [np.float64, int])
|
| 750 |
+
def test_series_roundtrip_numeric(self, orient, dtype):
|
| 751 |
+
s = Series(range(6), index=["a", "b", "c", "d", "e", "f"])
|
| 752 |
+
data = StringIO(s.to_json(orient=orient))
|
| 753 |
+
result = read_json(data, typ="series", orient=orient)
|
| 754 |
+
|
| 755 |
+
expected = s.copy()
|
| 756 |
+
if orient in ("values", "records"):
|
| 757 |
+
expected = expected.reset_index(drop=True)
|
| 758 |
+
|
| 759 |
+
tm.assert_series_equal(result, expected)
|
| 760 |
+
|
| 761 |
+
def test_series_to_json_except(self):
|
| 762 |
+
s = Series([1, 2, 3])
|
| 763 |
+
msg = "Invalid value 'garbage' for option 'orient'"
|
| 764 |
+
with pytest.raises(ValueError, match=msg):
|
| 765 |
+
s.to_json(orient="garbage")
|
| 766 |
+
|
| 767 |
+
def test_series_from_json_precise_float(self):
|
| 768 |
+
s = Series([4.56, 4.56, 4.56])
|
| 769 |
+
result = read_json(StringIO(s.to_json()), typ="series", precise_float=True)
|
| 770 |
+
tm.assert_series_equal(result, s, check_index_type=False)
|
| 771 |
+
|
| 772 |
+
def test_series_with_dtype(self):
|
| 773 |
+
# GH 21986
|
| 774 |
+
s = Series([4.56, 4.56, 4.56])
|
| 775 |
+
result = read_json(StringIO(s.to_json()), typ="series", dtype=np.int64)
|
| 776 |
+
expected = Series([4] * 3)
|
| 777 |
+
tm.assert_series_equal(result, expected)
|
| 778 |
+
|
| 779 |
+
@pytest.mark.parametrize(
|
| 780 |
+
"dtype,expected",
|
| 781 |
+
[
|
| 782 |
+
(True, Series(["2000-01-01"], dtype="datetime64[ns]")),
|
| 783 |
+
(False, Series([946684800000])),
|
| 784 |
+
],
|
| 785 |
+
)
|
| 786 |
+
def test_series_with_dtype_datetime(self, dtype, expected):
|
| 787 |
+
s = Series(["2000-01-01"], dtype="datetime64[ns]")
|
| 788 |
+
data = StringIO(s.to_json())
|
| 789 |
+
result = read_json(data, typ="series", dtype=dtype)
|
| 790 |
+
tm.assert_series_equal(result, expected)
|
| 791 |
+
|
| 792 |
+
def test_frame_from_json_precise_float(self):
|
| 793 |
+
df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
|
| 794 |
+
result = read_json(StringIO(df.to_json()), precise_float=True)
|
| 795 |
+
tm.assert_frame_equal(result, df)
|
| 796 |
+
|
| 797 |
+
def test_typ(self):
|
| 798 |
+
s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64")
|
| 799 |
+
result = read_json(StringIO(s.to_json()), typ=None)
|
| 800 |
+
tm.assert_series_equal(result, s)
|
| 801 |
+
|
| 802 |
+
def test_reconstruction_index(self):
|
| 803 |
+
df = DataFrame([[1, 2, 3], [4, 5, 6]])
|
| 804 |
+
result = read_json(StringIO(df.to_json()))
|
| 805 |
+
tm.assert_frame_equal(result, df)
|
| 806 |
+
|
| 807 |
+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
|
| 808 |
+
result = read_json(StringIO(df.to_json()))
|
| 809 |
+
tm.assert_frame_equal(result, df)
|
| 810 |
+
|
| 811 |
+
def test_path(self, float_frame, int_frame, datetime_frame):
|
| 812 |
+
with tm.ensure_clean("test.json") as path:
|
| 813 |
+
for df in [float_frame, int_frame, datetime_frame]:
|
| 814 |
+
df.to_json(path)
|
| 815 |
+
read_json(path)
|
| 816 |
+
|
| 817 |
+
def test_axis_dates(self, datetime_series, datetime_frame):
|
| 818 |
+
# frame
|
| 819 |
+
json = StringIO(datetime_frame.to_json())
|
| 820 |
+
result = read_json(json)
|
| 821 |
+
tm.assert_frame_equal(result, datetime_frame)
|
| 822 |
+
|
| 823 |
+
# series
|
| 824 |
+
json = StringIO(datetime_series.to_json())
|
| 825 |
+
result = read_json(json, typ="series")
|
| 826 |
+
tm.assert_series_equal(result, datetime_series, check_names=False)
|
| 827 |
+
assert result.name is None
|
| 828 |
+
|
| 829 |
+
def test_convert_dates(self, datetime_series, datetime_frame):
|
| 830 |
+
# frame
|
| 831 |
+
df = datetime_frame
|
| 832 |
+
df["date"] = Timestamp("20130101").as_unit("ns")
|
| 833 |
+
|
| 834 |
+
json = StringIO(df.to_json())
|
| 835 |
+
result = read_json(json)
|
| 836 |
+
tm.assert_frame_equal(result, df)
|
| 837 |
+
|
| 838 |
+
df["foo"] = 1.0
|
| 839 |
+
json = StringIO(df.to_json(date_unit="ns"))
|
| 840 |
+
|
| 841 |
+
result = read_json(json, convert_dates=False)
|
| 842 |
+
expected = df.copy()
|
| 843 |
+
expected["date"] = expected["date"].values.view("i8")
|
| 844 |
+
expected["foo"] = expected["foo"].astype("int64")
|
| 845 |
+
tm.assert_frame_equal(result, expected)
|
| 846 |
+
|
| 847 |
+
# series
|
| 848 |
+
ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index)
|
| 849 |
+
json = StringIO(ts.to_json())
|
| 850 |
+
result = read_json(json, typ="series")
|
| 851 |
+
tm.assert_series_equal(result, ts)
|
| 852 |
+
|
| 853 |
+
@pytest.mark.parametrize("date_format", ["epoch", "iso"])
|
| 854 |
+
@pytest.mark.parametrize("as_object", [True, False])
|
| 855 |
+
@pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, Timestamp])
|
| 856 |
+
def test_date_index_and_values(self, date_format, as_object, date_typ):
|
| 857 |
+
data = [date_typ(year=2020, month=1, day=1), pd.NaT]
|
| 858 |
+
if as_object:
|
| 859 |
+
data.append("a")
|
| 860 |
+
|
| 861 |
+
ser = Series(data, index=data)
|
| 862 |
+
result = ser.to_json(date_format=date_format)
|
| 863 |
+
|
| 864 |
+
if date_format == "epoch":
|
| 865 |
+
expected = '{"1577836800000":1577836800000,"null":null}'
|
| 866 |
+
else:
|
| 867 |
+
expected = (
|
| 868 |
+
'{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}'
|
| 869 |
+
)
|
| 870 |
+
|
| 871 |
+
if as_object:
|
| 872 |
+
expected = expected.replace("}", ',"a":"a"}')
|
| 873 |
+
|
| 874 |
+
assert result == expected
|
| 875 |
+
|
| 876 |
+
@pytest.mark.parametrize(
|
| 877 |
+
"infer_word",
|
| 878 |
+
[
|
| 879 |
+
"trade_time",
|
| 880 |
+
"date",
|
| 881 |
+
"datetime",
|
| 882 |
+
"sold_at",
|
| 883 |
+
"modified",
|
| 884 |
+
"timestamp",
|
| 885 |
+
"timestamps",
|
| 886 |
+
],
|
| 887 |
+
)
|
| 888 |
+
def test_convert_dates_infer(self, infer_word):
|
| 889 |
+
# GH10747
|
| 890 |
+
|
| 891 |
+
data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
|
| 892 |
+
expected = DataFrame(
|
| 893 |
+
[[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
|
| 894 |
+
)
|
| 895 |
+
|
| 896 |
+
result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
|
| 897 |
+
tm.assert_frame_equal(result, expected)
|
| 898 |
+
|
| 899 |
+
@pytest.mark.parametrize(
|
| 900 |
+
"date,date_unit",
|
| 901 |
+
[
|
| 902 |
+
("20130101 20:43:42.123", None),
|
| 903 |
+
("20130101 20:43:42", "s"),
|
| 904 |
+
("20130101 20:43:42.123", "ms"),
|
| 905 |
+
("20130101 20:43:42.123456", "us"),
|
| 906 |
+
("20130101 20:43:42.123456789", "ns"),
|
| 907 |
+
],
|
| 908 |
+
)
|
| 909 |
+
def test_date_format_frame(self, date, date_unit, datetime_frame):
|
| 910 |
+
df = datetime_frame
|
| 911 |
+
|
| 912 |
+
df["date"] = Timestamp(date).as_unit("ns")
|
| 913 |
+
df.iloc[1, df.columns.get_loc("date")] = pd.NaT
|
| 914 |
+
df.iloc[5, df.columns.get_loc("date")] = pd.NaT
|
| 915 |
+
if date_unit:
|
| 916 |
+
json = df.to_json(date_format="iso", date_unit=date_unit)
|
| 917 |
+
else:
|
| 918 |
+
json = df.to_json(date_format="iso")
|
| 919 |
+
|
| 920 |
+
result = read_json(StringIO(json))
|
| 921 |
+
expected = df.copy()
|
| 922 |
+
tm.assert_frame_equal(result, expected)
|
| 923 |
+
|
| 924 |
+
def test_date_format_frame_raises(self, datetime_frame):
|
| 925 |
+
df = datetime_frame
|
| 926 |
+
msg = "Invalid value 'foo' for option 'date_unit'"
|
| 927 |
+
with pytest.raises(ValueError, match=msg):
|
| 928 |
+
df.to_json(date_format="iso", date_unit="foo")
|
| 929 |
+
|
| 930 |
+
@pytest.mark.parametrize(
|
| 931 |
+
"date,date_unit",
|
| 932 |
+
[
|
| 933 |
+
("20130101 20:43:42.123", None),
|
| 934 |
+
("20130101 20:43:42", "s"),
|
| 935 |
+
("20130101 20:43:42.123", "ms"),
|
| 936 |
+
("20130101 20:43:42.123456", "us"),
|
| 937 |
+
("20130101 20:43:42.123456789", "ns"),
|
| 938 |
+
],
|
| 939 |
+
)
|
| 940 |
+
def test_date_format_series(self, date, date_unit, datetime_series):
|
| 941 |
+
ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
|
| 942 |
+
ts.iloc[1] = pd.NaT
|
| 943 |
+
ts.iloc[5] = pd.NaT
|
| 944 |
+
if date_unit:
|
| 945 |
+
json = ts.to_json(date_format="iso", date_unit=date_unit)
|
| 946 |
+
else:
|
| 947 |
+
json = ts.to_json(date_format="iso")
|
| 948 |
+
|
| 949 |
+
result = read_json(StringIO(json), typ="series")
|
| 950 |
+
expected = ts.copy()
|
| 951 |
+
tm.assert_series_equal(result, expected)
|
| 952 |
+
|
| 953 |
+
def test_date_format_series_raises(self, datetime_series):
|
| 954 |
+
ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index)
|
| 955 |
+
msg = "Invalid value 'foo' for option 'date_unit'"
|
| 956 |
+
with pytest.raises(ValueError, match=msg):
|
| 957 |
+
ts.to_json(date_format="iso", date_unit="foo")
|
| 958 |
+
|
| 959 |
+
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
|
| 960 |
+
def test_date_unit(self, unit, datetime_frame):
|
| 961 |
+
df = datetime_frame
|
| 962 |
+
df["date"] = Timestamp("20130101 20:43:42").as_unit("ns")
|
| 963 |
+
dl = df.columns.get_loc("date")
|
| 964 |
+
df.iloc[1, dl] = Timestamp("19710101 20:43:42")
|
| 965 |
+
df.iloc[2, dl] = Timestamp("21460101 20:43:42")
|
| 966 |
+
df.iloc[4, dl] = pd.NaT
|
| 967 |
+
|
| 968 |
+
json = df.to_json(date_format="epoch", date_unit=unit)
|
| 969 |
+
|
| 970 |
+
# force date unit
|
| 971 |
+
result = read_json(StringIO(json), date_unit=unit)
|
| 972 |
+
tm.assert_frame_equal(result, df)
|
| 973 |
+
|
| 974 |
+
# detect date unit
|
| 975 |
+
result = read_json(StringIO(json), date_unit=None)
|
| 976 |
+
tm.assert_frame_equal(result, df)
|
| 977 |
+
|
| 978 |
+
@pytest.mark.parametrize("unit", ["s", "ms", "us"])
|
| 979 |
+
def test_iso_non_nano_datetimes(self, unit):
|
| 980 |
+
# Test that numpy datetimes
|
| 981 |
+
# in an Index or a column with non-nano resolution can be serialized
|
| 982 |
+
# correctly
|
| 983 |
+
# GH53686
|
| 984 |
+
index = DatetimeIndex(
|
| 985 |
+
[np.datetime64("2023-01-01T11:22:33.123456", unit)],
|
| 986 |
+
dtype=f"datetime64[{unit}]",
|
| 987 |
+
)
|
| 988 |
+
df = DataFrame(
|
| 989 |
+
{
|
| 990 |
+
"date": Series(
|
| 991 |
+
[np.datetime64("2022-01-01T11:22:33.123456", unit)],
|
| 992 |
+
dtype=f"datetime64[{unit}]",
|
| 993 |
+
index=index,
|
| 994 |
+
),
|
| 995 |
+
"date_obj": Series(
|
| 996 |
+
[np.datetime64("2023-01-01T11:22:33.123456", unit)],
|
| 997 |
+
dtype=object,
|
| 998 |
+
index=index,
|
| 999 |
+
),
|
| 1000 |
+
},
|
| 1001 |
+
)
|
| 1002 |
+
|
| 1003 |
+
buf = StringIO()
|
| 1004 |
+
df.to_json(buf, date_format="iso", date_unit=unit)
|
| 1005 |
+
buf.seek(0)
|
| 1006 |
+
|
| 1007 |
+
# read_json always reads datetimes in nanosecond resolution
|
| 1008 |
+
# TODO: check_dtype/check_index_type should be removable
|
| 1009 |
+
# once read_json gets non-nano support
|
| 1010 |
+
tm.assert_frame_equal(
|
| 1011 |
+
read_json(buf, convert_dates=["date", "date_obj"]),
|
| 1012 |
+
df,
|
| 1013 |
+
check_index_type=False,
|
| 1014 |
+
check_dtype=False,
|
| 1015 |
+
)
|
| 1016 |
+
|
| 1017 |
+
def test_weird_nested_json(self):
|
| 1018 |
+
# this used to core dump the parser
|
| 1019 |
+
s = r"""{
|
| 1020 |
+
"status": "success",
|
| 1021 |
+
"data": {
|
| 1022 |
+
"posts": [
|
| 1023 |
+
{
|
| 1024 |
+
"id": 1,
|
| 1025 |
+
"title": "A blog post",
|
| 1026 |
+
"body": "Some useful content"
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"id": 2,
|
| 1030 |
+
"title": "Another blog post",
|
| 1031 |
+
"body": "More content"
|
| 1032 |
+
}
|
| 1033 |
+
]
|
| 1034 |
+
}
|
| 1035 |
+
}"""
|
| 1036 |
+
read_json(StringIO(s))
|
| 1037 |
+
|
| 1038 |
+
def test_doc_example(self):
|
| 1039 |
+
dfj2 = DataFrame(
|
| 1040 |
+
np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
|
| 1041 |
+
)
|
| 1042 |
+
dfj2["date"] = Timestamp("20130101")
|
| 1043 |
+
dfj2["ints"] = range(5)
|
| 1044 |
+
dfj2["bools"] = True
|
| 1045 |
+
dfj2.index = date_range("20130101", periods=5)
|
| 1046 |
+
|
| 1047 |
+
json = StringIO(dfj2.to_json())
|
| 1048 |
+
result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
|
| 1049 |
+
tm.assert_frame_equal(result, result)
|
| 1050 |
+
|
| 1051 |
+
def test_round_trip_exception(self, datapath):
|
| 1052 |
+
# GH 3867
|
| 1053 |
+
path = datapath("io", "json", "data", "teams.csv")
|
| 1054 |
+
df = pd.read_csv(path)
|
| 1055 |
+
s = df.to_json()
|
| 1056 |
+
|
| 1057 |
+
result = read_json(StringIO(s))
|
| 1058 |
+
res = result.reindex(index=df.index, columns=df.columns)
|
| 1059 |
+
msg = "The 'downcast' keyword in fillna is deprecated"
|
| 1060 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 1061 |
+
res = res.fillna(np.nan, downcast=False)
|
| 1062 |
+
tm.assert_frame_equal(res, df)
|
| 1063 |
+
|
| 1064 |
+
@pytest.mark.network
|
| 1065 |
+
@pytest.mark.single_cpu
|
| 1066 |
+
@pytest.mark.parametrize(
|
| 1067 |
+
"field,dtype",
|
| 1068 |
+
[
|
| 1069 |
+
["created_at", pd.DatetimeTZDtype(tz="UTC")],
|
| 1070 |
+
["closed_at", "datetime64[ns]"],
|
| 1071 |
+
["updated_at", pd.DatetimeTZDtype(tz="UTC")],
|
| 1072 |
+
],
|
| 1073 |
+
)
|
| 1074 |
+
def test_url(self, field, dtype, httpserver):
|
| 1075 |
+
data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n' # noqa: E501
|
| 1076 |
+
httpserver.serve_content(content=data)
|
| 1077 |
+
result = read_json(httpserver.url, convert_dates=True)
|
| 1078 |
+
assert result[field].dtype == dtype
|
| 1079 |
+
|
| 1080 |
+
def test_timedelta(self):
|
| 1081 |
+
converter = lambda x: pd.to_timedelta(x, unit="ms")
|
| 1082 |
+
|
| 1083 |
+
ser = Series([timedelta(23), timedelta(seconds=5)])
|
| 1084 |
+
assert ser.dtype == "timedelta64[ns]"
|
| 1085 |
+
|
| 1086 |
+
result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
|
| 1087 |
+
tm.assert_series_equal(result, ser)
|
| 1088 |
+
|
| 1089 |
+
ser = Series([timedelta(23), timedelta(seconds=5)], index=Index([0, 1]))
|
| 1090 |
+
assert ser.dtype == "timedelta64[ns]"
|
| 1091 |
+
result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
|
| 1092 |
+
tm.assert_series_equal(result, ser)
|
| 1093 |
+
|
| 1094 |
+
frame = DataFrame([timedelta(23), timedelta(seconds=5)])
|
| 1095 |
+
assert frame[0].dtype == "timedelta64[ns]"
|
| 1096 |
+
tm.assert_frame_equal(
|
| 1097 |
+
frame, read_json(StringIO(frame.to_json())).apply(converter)
|
| 1098 |
+
)
|
| 1099 |
+
|
| 1100 |
+
def test_timedelta2(self):
|
| 1101 |
+
frame = DataFrame(
|
| 1102 |
+
{
|
| 1103 |
+
"a": [timedelta(days=23), timedelta(seconds=5)],
|
| 1104 |
+
"b": [1, 2],
|
| 1105 |
+
"c": date_range(start="20130101", periods=2),
|
| 1106 |
+
}
|
| 1107 |
+
)
|
| 1108 |
+
data = StringIO(frame.to_json(date_unit="ns"))
|
| 1109 |
+
result = read_json(data)
|
| 1110 |
+
result["a"] = pd.to_timedelta(result.a, unit="ns")
|
| 1111 |
+
result["c"] = pd.to_datetime(result.c)
|
| 1112 |
+
tm.assert_frame_equal(frame, result)
|
| 1113 |
+
|
| 1114 |
+
def test_mixed_timedelta_datetime(self):
|
| 1115 |
+
td = timedelta(23)
|
| 1116 |
+
ts = Timestamp("20130101")
|
| 1117 |
+
frame = DataFrame({"a": [td, ts]}, dtype=object)
|
| 1118 |
+
|
| 1119 |
+
expected = DataFrame(
|
| 1120 |
+
{"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]}
|
| 1121 |
+
)
|
| 1122 |
+
data = StringIO(frame.to_json(date_unit="ns"))
|
| 1123 |
+
result = read_json(data, dtype={"a": "int64"})
|
| 1124 |
+
tm.assert_frame_equal(result, expected, check_index_type=False)
|
| 1125 |
+
|
| 1126 |
+
@pytest.mark.parametrize("as_object", [True, False])
|
| 1127 |
+
@pytest.mark.parametrize("date_format", ["iso", "epoch"])
|
| 1128 |
+
@pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
|
| 1129 |
+
def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
|
| 1130 |
+
# GH28156: to_json not correctly formatting Timedelta
|
| 1131 |
+
data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT]
|
| 1132 |
+
if as_object:
|
| 1133 |
+
data.append("a")
|
| 1134 |
+
|
| 1135 |
+
ser = Series(data, index=data)
|
| 1136 |
+
if date_format == "iso":
|
| 1137 |
+
expected = (
|
| 1138 |
+
'{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}'
|
| 1139 |
+
)
|
| 1140 |
+
else:
|
| 1141 |
+
expected = '{"86400000":86400000,"172800000":172800000,"null":null}'
|
| 1142 |
+
|
| 1143 |
+
if as_object:
|
| 1144 |
+
expected = expected.replace("}", ',"a":"a"}')
|
| 1145 |
+
|
| 1146 |
+
result = ser.to_json(date_format=date_format)
|
| 1147 |
+
assert result == expected
|
| 1148 |
+
|
| 1149 |
+
@pytest.mark.parametrize("as_object", [True, False])
|
| 1150 |
+
@pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
|
| 1151 |
+
def test_timedelta_to_json_fractional_precision(self, as_object, timedelta_typ):
|
| 1152 |
+
data = [timedelta_typ(milliseconds=42)]
|
| 1153 |
+
ser = Series(data, index=data)
|
| 1154 |
+
if as_object:
|
| 1155 |
+
ser = ser.astype(object)
|
| 1156 |
+
|
| 1157 |
+
result = ser.to_json()
|
| 1158 |
+
expected = '{"42":42}'
|
| 1159 |
+
assert result == expected
|
| 1160 |
+
|
| 1161 |
+
def test_default_handler(self):
|
| 1162 |
+
value = object()
|
| 1163 |
+
frame = DataFrame({"a": [7, value]})
|
| 1164 |
+
expected = DataFrame({"a": [7, str(value)]})
|
| 1165 |
+
result = read_json(StringIO(frame.to_json(default_handler=str)))
|
| 1166 |
+
tm.assert_frame_equal(expected, result, check_index_type=False)
|
| 1167 |
+
|
| 1168 |
+
def test_default_handler_indirect(self):
|
| 1169 |
+
def default(obj):
|
| 1170 |
+
if isinstance(obj, complex):
|
| 1171 |
+
return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
|
| 1172 |
+
return str(obj)
|
| 1173 |
+
|
| 1174 |
+
df_list = [
|
| 1175 |
+
9,
|
| 1176 |
+
DataFrame(
|
| 1177 |
+
{"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]},
|
| 1178 |
+
columns=["a", "b"],
|
| 1179 |
+
),
|
| 1180 |
+
]
|
| 1181 |
+
expected = (
|
| 1182 |
+
'[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
|
| 1183 |
+
'["re",4.0],["im",-5.0]],"N\\/A"]]]'
|
| 1184 |
+
)
|
| 1185 |
+
assert (
|
| 1186 |
+
ujson_dumps(df_list, default_handler=default, orient="values") == expected
|
| 1187 |
+
)
|
| 1188 |
+
|
| 1189 |
+
def test_default_handler_numpy_unsupported_dtype(self):
|
| 1190 |
+
# GH12554 to_json raises 'Unhandled numpy dtype 15'
|
| 1191 |
+
df = DataFrame(
|
| 1192 |
+
{"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]},
|
| 1193 |
+
columns=["a", "b"],
|
| 1194 |
+
)
|
| 1195 |
+
expected = (
|
| 1196 |
+
'[["(1+0j)","(nan+0j)"],'
|
| 1197 |
+
'["(2.3+0j)","(nan+0j)"],'
|
| 1198 |
+
'["(4-5j)","(1.2+0j)"]]'
|
| 1199 |
+
)
|
| 1200 |
+
assert df.to_json(default_handler=str, orient="values") == expected
|
| 1201 |
+
|
| 1202 |
+
def test_default_handler_raises(self):
|
| 1203 |
+
msg = "raisin"
|
| 1204 |
+
|
| 1205 |
+
def my_handler_raises(obj):
|
| 1206 |
+
raise TypeError(msg)
|
| 1207 |
+
|
| 1208 |
+
with pytest.raises(TypeError, match=msg):
|
| 1209 |
+
DataFrame({"a": [1, 2, object()]}).to_json(
|
| 1210 |
+
default_handler=my_handler_raises
|
| 1211 |
+
)
|
| 1212 |
+
with pytest.raises(TypeError, match=msg):
|
| 1213 |
+
DataFrame({"a": [1, 2, complex(4, -5)]}).to_json(
|
| 1214 |
+
default_handler=my_handler_raises
|
| 1215 |
+
)
|
| 1216 |
+
|
| 1217 |
+
def test_categorical(self):
|
| 1218 |
+
# GH4377 df.to_json segfaults with non-ndarray blocks
|
| 1219 |
+
df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
|
| 1220 |
+
df["B"] = df["A"]
|
| 1221 |
+
expected = df.to_json()
|
| 1222 |
+
|
| 1223 |
+
df["B"] = df["A"].astype("category")
|
| 1224 |
+
assert expected == df.to_json()
|
| 1225 |
+
|
| 1226 |
+
s = df["A"]
|
| 1227 |
+
sc = df["B"]
|
| 1228 |
+
assert s.to_json() == sc.to_json()
|
| 1229 |
+
|
| 1230 |
+
def test_datetime_tz(self):
|
| 1231 |
+
# GH4377 df.to_json segfaults with non-ndarray blocks
|
| 1232 |
+
tz_range = date_range("20130101", periods=3, tz="US/Eastern")
|
| 1233 |
+
tz_naive = tz_range.tz_convert("utc").tz_localize(None)
|
| 1234 |
+
|
| 1235 |
+
df = DataFrame({"A": tz_range, "B": date_range("20130101", periods=3)})
|
| 1236 |
+
|
| 1237 |
+
df_naive = df.copy()
|
| 1238 |
+
df_naive["A"] = tz_naive
|
| 1239 |
+
expected = df_naive.to_json()
|
| 1240 |
+
assert expected == df.to_json()
|
| 1241 |
+
|
| 1242 |
+
stz = Series(tz_range)
|
| 1243 |
+
s_naive = Series(tz_naive)
|
| 1244 |
+
assert stz.to_json() == s_naive.to_json()
|
| 1245 |
+
|
| 1246 |
+
def test_sparse(self):
|
| 1247 |
+
# GH4377 df.to_json segfaults with non-ndarray blocks
|
| 1248 |
+
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
| 1249 |
+
df.loc[:8] = np.nan
|
| 1250 |
+
|
| 1251 |
+
sdf = df.astype("Sparse")
|
| 1252 |
+
expected = df.to_json()
|
| 1253 |
+
assert expected == sdf.to_json()
|
| 1254 |
+
|
| 1255 |
+
s = Series(np.random.default_rng(2).standard_normal(10))
|
| 1256 |
+
s.loc[:8] = np.nan
|
| 1257 |
+
ss = s.astype("Sparse")
|
| 1258 |
+
|
| 1259 |
+
expected = s.to_json()
|
| 1260 |
+
assert expected == ss.to_json()
|
| 1261 |
+
|
| 1262 |
+
@pytest.mark.parametrize(
|
| 1263 |
+
"ts",
|
| 1264 |
+
[
|
| 1265 |
+
Timestamp("2013-01-10 05:00:00Z"),
|
| 1266 |
+
Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
|
| 1267 |
+
Timestamp("2013-01-10 00:00:00-0500"),
|
| 1268 |
+
],
|
| 1269 |
+
)
|
| 1270 |
+
def test_tz_is_utc(self, ts):
|
| 1271 |
+
exp = '"2013-01-10T05:00:00.000Z"'
|
| 1272 |
+
|
| 1273 |
+
assert ujson_dumps(ts, iso_dates=True) == exp
|
| 1274 |
+
dt = ts.to_pydatetime()
|
| 1275 |
+
assert ujson_dumps(dt, iso_dates=True) == exp
|
| 1276 |
+
|
| 1277 |
+
def test_tz_is_naive(self):
|
| 1278 |
+
ts = Timestamp("2013-01-10 05:00:00")
|
| 1279 |
+
exp = '"2013-01-10T05:00:00.000"'
|
| 1280 |
+
|
| 1281 |
+
assert ujson_dumps(ts, iso_dates=True) == exp
|
| 1282 |
+
dt = ts.to_pydatetime()
|
| 1283 |
+
assert ujson_dumps(dt, iso_dates=True) == exp
|
| 1284 |
+
|
| 1285 |
+
@pytest.mark.parametrize(
|
| 1286 |
+
"tz_range",
|
| 1287 |
+
[
|
| 1288 |
+
date_range("2013-01-01 05:00:00Z", periods=2),
|
| 1289 |
+
date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"),
|
| 1290 |
+
date_range("2013-01-01 00:00:00-0500", periods=2),
|
| 1291 |
+
],
|
| 1292 |
+
)
|
| 1293 |
+
def test_tz_range_is_utc(self, tz_range):
|
| 1294 |
+
exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
|
| 1295 |
+
dfexp = (
|
| 1296 |
+
'{"DT":{'
|
| 1297 |
+
'"0":"2013-01-01T05:00:00.000Z",'
|
| 1298 |
+
'"1":"2013-01-02T05:00:00.000Z"}}'
|
| 1299 |
+
)
|
| 1300 |
+
|
| 1301 |
+
assert ujson_dumps(tz_range, iso_dates=True) == exp
|
| 1302 |
+
dti = DatetimeIndex(tz_range)
|
| 1303 |
+
# Ensure datetimes in object array are serialized correctly
|
| 1304 |
+
# in addition to the normal DTI case
|
| 1305 |
+
assert ujson_dumps(dti, iso_dates=True) == exp
|
| 1306 |
+
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
|
| 1307 |
+
df = DataFrame({"DT": dti})
|
| 1308 |
+
result = ujson_dumps(df, iso_dates=True)
|
| 1309 |
+
assert result == dfexp
|
| 1310 |
+
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
|
| 1311 |
+
|
| 1312 |
+
def test_tz_range_is_naive(self):
|
| 1313 |
+
dti = date_range("2013-01-01 05:00:00", periods=2)
|
| 1314 |
+
|
| 1315 |
+
exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
|
| 1316 |
+
dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
|
| 1317 |
+
|
| 1318 |
+
# Ensure datetimes in object array are serialized correctly
|
| 1319 |
+
# in addition to the normal DTI case
|
| 1320 |
+
assert ujson_dumps(dti, iso_dates=True) == exp
|
| 1321 |
+
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
|
| 1322 |
+
df = DataFrame({"DT": dti})
|
| 1323 |
+
result = ujson_dumps(df, iso_dates=True)
|
| 1324 |
+
assert result == dfexp
|
| 1325 |
+
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
|
| 1326 |
+
|
| 1327 |
+
def test_read_inline_jsonl(self):
|
| 1328 |
+
# GH9180
|
| 1329 |
+
|
| 1330 |
+
result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True)
|
| 1331 |
+
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
|
| 1332 |
+
tm.assert_frame_equal(result, expected)
|
| 1333 |
+
|
| 1334 |
+
@pytest.mark.single_cpu
|
| 1335 |
+
@td.skip_if_not_us_locale
|
| 1336 |
+
def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
|
| 1337 |
+
# GH17200
|
| 1338 |
+
|
| 1339 |
+
result = read_json(
|
| 1340 |
+
f"s3n://{s3_public_bucket_with_data.name}/items.jsonl",
|
| 1341 |
+
lines=True,
|
| 1342 |
+
storage_options=s3so,
|
| 1343 |
+
)
|
| 1344 |
+
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
|
| 1345 |
+
tm.assert_frame_equal(result, expected)
|
| 1346 |
+
|
| 1347 |
+
def test_read_local_jsonl(self):
|
| 1348 |
+
# GH17200
|
| 1349 |
+
with tm.ensure_clean("tmp_items.json") as path:
|
| 1350 |
+
with open(path, "w", encoding="utf-8") as infile:
|
| 1351 |
+
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
|
| 1352 |
+
result = read_json(path, lines=True)
|
| 1353 |
+
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
|
| 1354 |
+
tm.assert_frame_equal(result, expected)
|
| 1355 |
+
|
| 1356 |
+
def test_read_jsonl_unicode_chars(self):
|
| 1357 |
+
# GH15132: non-ascii unicode characters
|
| 1358 |
+
# \u201d == RIGHT DOUBLE QUOTATION MARK
|
| 1359 |
+
|
| 1360 |
+
# simulate file handle
|
| 1361 |
+
json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
|
| 1362 |
+
json = StringIO(json)
|
| 1363 |
+
result = read_json(json, lines=True)
|
| 1364 |
+
expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
|
| 1365 |
+
tm.assert_frame_equal(result, expected)
|
| 1366 |
+
|
| 1367 |
+
# simulate string
|
| 1368 |
+
json = StringIO('{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n')
|
| 1369 |
+
result = read_json(json, lines=True)
|
| 1370 |
+
expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
|
| 1371 |
+
tm.assert_frame_equal(result, expected)
|
| 1372 |
+
|
| 1373 |
+
@pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
|
| 1374 |
+
def test_to_json_large_numbers(self, bigNum):
|
| 1375 |
+
# GH34473
|
| 1376 |
+
series = Series(bigNum, dtype=object, index=["articleId"])
|
| 1377 |
+
json = series.to_json()
|
| 1378 |
+
expected = '{"articleId":' + str(bigNum) + "}"
|
| 1379 |
+
assert json == expected
|
| 1380 |
+
|
| 1381 |
+
df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
|
| 1382 |
+
json = df.to_json()
|
| 1383 |
+
expected = '{"0":{"articleId":' + str(bigNum) + "}}"
|
| 1384 |
+
assert json == expected
|
| 1385 |
+
|
| 1386 |
+
@pytest.mark.parametrize("bigNum", [-(2**63) - 1, 2**64])
|
| 1387 |
+
def test_read_json_large_numbers(self, bigNum):
|
| 1388 |
+
# GH20599, 26068
|
| 1389 |
+
json = StringIO('{"articleId":' + str(bigNum) + "}")
|
| 1390 |
+
msg = r"Value is too small|Value is too big"
|
| 1391 |
+
with pytest.raises(ValueError, match=msg):
|
| 1392 |
+
read_json(json)
|
| 1393 |
+
|
| 1394 |
+
json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}")
|
| 1395 |
+
with pytest.raises(ValueError, match=msg):
|
| 1396 |
+
read_json(json)
|
| 1397 |
+
|
| 1398 |
+
def test_read_json_large_numbers2(self):
|
| 1399 |
+
# GH18842
|
| 1400 |
+
json = '{"articleId": "1404366058080022500245"}'
|
| 1401 |
+
json = StringIO(json)
|
| 1402 |
+
result = read_json(json, typ="series")
|
| 1403 |
+
expected = Series(1.404366e21, index=["articleId"])
|
| 1404 |
+
tm.assert_series_equal(result, expected)
|
| 1405 |
+
|
| 1406 |
+
json = '{"0": {"articleId": "1404366058080022500245"}}'
|
| 1407 |
+
json = StringIO(json)
|
| 1408 |
+
result = read_json(json)
|
| 1409 |
+
expected = DataFrame(1.404366e21, index=["articleId"], columns=[0])
|
| 1410 |
+
tm.assert_frame_equal(result, expected)
|
| 1411 |
+
|
| 1412 |
+
def test_to_jsonl(self):
|
| 1413 |
+
# GH9180
|
| 1414 |
+
df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
|
| 1415 |
+
result = df.to_json(orient="records", lines=True)
|
| 1416 |
+
expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
|
| 1417 |
+
assert result == expected
|
| 1418 |
+
|
| 1419 |
+
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
|
| 1420 |
+
result = df.to_json(orient="records", lines=True)
|
| 1421 |
+
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
|
| 1422 |
+
assert result == expected
|
| 1423 |
+
tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
|
| 1424 |
+
|
| 1425 |
+
# GH15096: escaped characters in columns and data
|
| 1426 |
+
df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
|
| 1427 |
+
result = df.to_json(orient="records", lines=True)
|
| 1428 |
+
expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
|
| 1429 |
+
assert result == expected
|
| 1430 |
+
|
| 1431 |
+
tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
|
| 1432 |
+
|
| 1433 |
+
# TODO: there is a near-identical test for pytables; can we share?
|
| 1434 |
+
@pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
|
| 1435 |
+
@pytest.mark.parametrize(
|
| 1436 |
+
"val",
|
| 1437 |
+
[
|
| 1438 |
+
[b"E\xc9, 17", b"", b"a", b"b", b"c"],
|
| 1439 |
+
[b"E\xc9, 17", b"a", b"b", b"c"],
|
| 1440 |
+
[b"EE, 17", b"", b"a", b"b", b"c"],
|
| 1441 |
+
[b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
|
| 1442 |
+
[b"", b"a", b"b", b"c"],
|
| 1443 |
+
[b"\xf8\xfc", b"a", b"b", b"c"],
|
| 1444 |
+
[b"A\xf8\xfc", b"", b"a", b"b", b"c"],
|
| 1445 |
+
[np.nan, b"", b"b", b"c"],
|
| 1446 |
+
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
|
| 1447 |
+
],
|
| 1448 |
+
)
|
| 1449 |
+
@pytest.mark.parametrize("dtype", ["category", object])
|
| 1450 |
+
def test_latin_encoding(self, dtype, val):
|
| 1451 |
+
# GH 13774
|
| 1452 |
+
ser = Series(
|
| 1453 |
+
[x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
|
| 1454 |
+
dtype=dtype,
|
| 1455 |
+
)
|
| 1456 |
+
encoding = "latin-1"
|
| 1457 |
+
with tm.ensure_clean("test.json") as path:
|
| 1458 |
+
ser.to_json(path, encoding=encoding)
|
| 1459 |
+
retr = read_json(StringIO(path), encoding=encoding)
|
| 1460 |
+
tm.assert_series_equal(ser, retr, check_categorical=False)
|
| 1461 |
+
|
| 1462 |
+
def test_data_frame_size_after_to_json(self):
|
| 1463 |
+
# GH15344
|
| 1464 |
+
df = DataFrame({"a": [str(1)]})
|
| 1465 |
+
|
| 1466 |
+
size_before = df.memory_usage(index=True, deep=True).sum()
|
| 1467 |
+
df.to_json()
|
| 1468 |
+
size_after = df.memory_usage(index=True, deep=True).sum()
|
| 1469 |
+
|
| 1470 |
+
assert size_before == size_after
|
| 1471 |
+
|
| 1472 |
+
@pytest.mark.parametrize(
|
| 1473 |
+
"index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
|
| 1474 |
+
)
|
| 1475 |
+
@pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]])
|
| 1476 |
+
def test_from_json_to_json_table_index_and_columns(self, index, columns):
|
| 1477 |
+
# GH25433 GH25435
|
| 1478 |
+
expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
|
| 1479 |
+
dfjson = expected.to_json(orient="table")
|
| 1480 |
+
|
| 1481 |
+
result = read_json(StringIO(dfjson), orient="table")
|
| 1482 |
+
tm.assert_frame_equal(result, expected)
|
| 1483 |
+
|
| 1484 |
+
def test_from_json_to_json_table_dtypes(self):
|
| 1485 |
+
# GH21345
|
| 1486 |
+
expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
|
| 1487 |
+
dfjson = expected.to_json(orient="table")
|
| 1488 |
+
result = read_json(StringIO(dfjson), orient="table")
|
| 1489 |
+
tm.assert_frame_equal(result, expected)
|
| 1490 |
+
|
| 1491 |
+
# TODO: We are casting to string which coerces None to NaN before casting back
|
| 1492 |
+
# to object, ending up with incorrect na values
|
| 1493 |
+
@pytest.mark.xfail(using_string_dtype(), reason="incorrect na conversion")
|
| 1494 |
+
@pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
|
| 1495 |
+
def test_to_json_from_json_columns_dtypes(self, orient):
|
| 1496 |
+
# GH21892 GH33205
|
| 1497 |
+
expected = DataFrame.from_dict(
|
| 1498 |
+
{
|
| 1499 |
+
"Integer": Series([1, 2, 3], dtype="int64"),
|
| 1500 |
+
"Float": Series([None, 2.0, 3.0], dtype="float64"),
|
| 1501 |
+
"Object": Series([None, "", "c"], dtype="object"),
|
| 1502 |
+
"Bool": Series([True, False, True], dtype="bool"),
|
| 1503 |
+
"Category": Series(["a", "b", None], dtype="category"),
|
| 1504 |
+
"Datetime": Series(
|
| 1505 |
+
["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
|
| 1506 |
+
),
|
| 1507 |
+
}
|
| 1508 |
+
)
|
| 1509 |
+
dfjson = expected.to_json(orient=orient)
|
| 1510 |
+
|
| 1511 |
+
result = read_json(
|
| 1512 |
+
StringIO(dfjson),
|
| 1513 |
+
orient=orient,
|
| 1514 |
+
dtype={
|
| 1515 |
+
"Integer": "int64",
|
| 1516 |
+
"Float": "float64",
|
| 1517 |
+
"Object": "object",
|
| 1518 |
+
"Bool": "bool",
|
| 1519 |
+
"Category": "category",
|
| 1520 |
+
"Datetime": "datetime64[ns]",
|
| 1521 |
+
},
|
| 1522 |
+
)
|
| 1523 |
+
tm.assert_frame_equal(result, expected)
|
| 1524 |
+
|
| 1525 |
+
@pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
|
| 1526 |
+
def test_read_json_table_dtype_raises(self, dtype):
|
| 1527 |
+
# GH21345
|
| 1528 |
+
df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
|
| 1529 |
+
dfjson = df.to_json(orient="table")
|
| 1530 |
+
msg = "cannot pass both dtype and orient='table'"
|
| 1531 |
+
with pytest.raises(ValueError, match=msg):
|
| 1532 |
+
read_json(dfjson, orient="table", dtype=dtype)
|
| 1533 |
+
|
| 1534 |
+
@pytest.mark.parametrize("orient", ["index", "columns", "records", "values"])
|
| 1535 |
+
def test_read_json_table_empty_axes_dtype(self, orient):
|
| 1536 |
+
# GH28558
|
| 1537 |
+
|
| 1538 |
+
expected = DataFrame()
|
| 1539 |
+
result = read_json(StringIO("{}"), orient=orient, convert_axes=True)
|
| 1540 |
+
tm.assert_index_equal(result.index, expected.index)
|
| 1541 |
+
tm.assert_index_equal(result.columns, expected.columns)
|
| 1542 |
+
|
| 1543 |
+
def test_read_json_table_convert_axes_raises(self):
|
| 1544 |
+
# GH25433 GH25435
|
| 1545 |
+
df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."])
|
| 1546 |
+
dfjson = df.to_json(orient="table")
|
| 1547 |
+
msg = "cannot pass both convert_axes and orient='table'"
|
| 1548 |
+
with pytest.raises(ValueError, match=msg):
|
| 1549 |
+
read_json(dfjson, orient="table", convert_axes=True)
|
| 1550 |
+
|
| 1551 |
+
@pytest.mark.parametrize(
|
| 1552 |
+
"data, expected",
|
| 1553 |
+
[
|
| 1554 |
+
(
|
| 1555 |
+
DataFrame([[1, 2], [4, 5]], columns=["a", "b"]),
|
| 1556 |
+
{"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
|
| 1557 |
+
),
|
| 1558 |
+
(
|
| 1559 |
+
DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"),
|
| 1560 |
+
{"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
|
| 1561 |
+
),
|
| 1562 |
+
(
|
| 1563 |
+
DataFrame(
|
| 1564 |
+
[[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
|
| 1565 |
+
),
|
| 1566 |
+
{"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
|
| 1567 |
+
),
|
| 1568 |
+
(Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}),
|
| 1569 |
+
(
|
| 1570 |
+
Series([1, 2, 3], name="A").rename_axis("foo"),
|
| 1571 |
+
{"name": "A", "data": [1, 2, 3]},
|
| 1572 |
+
),
|
| 1573 |
+
(
|
| 1574 |
+
Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]),
|
| 1575 |
+
{"name": "A", "data": [1, 2]},
|
| 1576 |
+
),
|
| 1577 |
+
],
|
| 1578 |
+
)
|
| 1579 |
+
def test_index_false_to_json_split(self, data, expected):
|
| 1580 |
+
# GH 17394
|
| 1581 |
+
# Testing index=False in to_json with orient='split'
|
| 1582 |
+
|
| 1583 |
+
result = data.to_json(orient="split", index=False)
|
| 1584 |
+
result = json.loads(result)
|
| 1585 |
+
|
| 1586 |
+
assert result == expected
|
| 1587 |
+
|
| 1588 |
+
@pytest.mark.parametrize(
|
| 1589 |
+
"data",
|
| 1590 |
+
[
|
| 1591 |
+
(DataFrame([[1, 2], [4, 5]], columns=["a", "b"])),
|
| 1592 |
+
(DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")),
|
| 1593 |
+
(
|
| 1594 |
+
DataFrame(
|
| 1595 |
+
[[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
|
| 1596 |
+
)
|
| 1597 |
+
),
|
| 1598 |
+
(Series([1, 2, 3], name="A")),
|
| 1599 |
+
(Series([1, 2, 3], name="A").rename_axis("foo")),
|
| 1600 |
+
(Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])),
|
| 1601 |
+
],
|
| 1602 |
+
)
|
| 1603 |
+
def test_index_false_to_json_table(self, data):
|
| 1604 |
+
# GH 17394
|
| 1605 |
+
# Testing index=False in to_json with orient='table'
|
| 1606 |
+
|
| 1607 |
+
result = data.to_json(orient="table", index=False)
|
| 1608 |
+
result = json.loads(result)
|
| 1609 |
+
|
| 1610 |
+
expected = {
|
| 1611 |
+
"schema": pd.io.json.build_table_schema(data, index=False),
|
| 1612 |
+
"data": DataFrame(data).to_dict(orient="records"),
|
| 1613 |
+
}
|
| 1614 |
+
|
| 1615 |
+
assert result == expected
|
| 1616 |
+
|
| 1617 |
+
@pytest.mark.parametrize("orient", ["index", "columns"])
|
| 1618 |
+
def test_index_false_error_to_json(self, orient):
|
| 1619 |
+
# GH 17394, 25513
|
| 1620 |
+
# Testing error message from to_json with index=False
|
| 1621 |
+
|
| 1622 |
+
df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
|
| 1623 |
+
|
| 1624 |
+
msg = (
|
| 1625 |
+
"'index=False' is only valid when 'orient' is 'split', "
|
| 1626 |
+
"'table', 'records', or 'values'"
|
| 1627 |
+
)
|
| 1628 |
+
with pytest.raises(ValueError, match=msg):
|
| 1629 |
+
df.to_json(orient=orient, index=False)
|
| 1630 |
+
|
| 1631 |
+
@pytest.mark.parametrize("orient", ["records", "values"])
|
| 1632 |
+
def test_index_true_error_to_json(self, orient):
|
| 1633 |
+
# GH 25513
|
| 1634 |
+
# Testing error message from to_json with index=True
|
| 1635 |
+
|
| 1636 |
+
df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
|
| 1637 |
+
|
| 1638 |
+
msg = (
|
| 1639 |
+
"'index=True' is only valid when 'orient' is 'split', "
|
| 1640 |
+
"'table', 'index', or 'columns'"
|
| 1641 |
+
)
|
| 1642 |
+
with pytest.raises(ValueError, match=msg):
|
| 1643 |
+
df.to_json(orient=orient, index=True)
|
| 1644 |
+
|
| 1645 |
+
@pytest.mark.parametrize("orient", ["split", "table"])
|
| 1646 |
+
@pytest.mark.parametrize("index", [True, False])
|
| 1647 |
+
def test_index_false_from_json_to_json(self, orient, index):
|
| 1648 |
+
# GH25170
|
| 1649 |
+
# Test index=False in from_json to_json
|
| 1650 |
+
expected = DataFrame({"a": [1, 2], "b": [3, 4]})
|
| 1651 |
+
dfjson = expected.to_json(orient=orient, index=index)
|
| 1652 |
+
result = read_json(StringIO(dfjson), orient=orient)
|
| 1653 |
+
tm.assert_frame_equal(result, expected)
|
| 1654 |
+
|
| 1655 |
+
def test_read_timezone_information(self):
|
| 1656 |
+
# GH 25546
|
| 1657 |
+
result = read_json(
|
| 1658 |
+
StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
|
| 1659 |
+
)
|
| 1660 |
+
exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
|
| 1661 |
+
expected = Series([88], index=exp_dti)
|
| 1662 |
+
tm.assert_series_equal(result, expected)
|
| 1663 |
+
|
| 1664 |
+
@pytest.mark.parametrize(
|
| 1665 |
+
"url",
|
| 1666 |
+
[
|
| 1667 |
+
"s3://example-fsspec/",
|
| 1668 |
+
"gcs://another-fsspec/file.json",
|
| 1669 |
+
"https://example-site.com/data",
|
| 1670 |
+
"some-protocol://data.txt",
|
| 1671 |
+
],
|
| 1672 |
+
)
|
| 1673 |
+
def test_read_json_with_url_value(self, url):
|
| 1674 |
+
# GH 36271
|
| 1675 |
+
result = read_json(StringIO(f'{{"url":{{"0":"{url}"}}}}'))
|
| 1676 |
+
expected = DataFrame({"url": [url]})
|
| 1677 |
+
tm.assert_frame_equal(result, expected)
|
| 1678 |
+
|
| 1679 |
+
@pytest.mark.parametrize(
|
| 1680 |
+
"compression",
|
| 1681 |
+
["", ".gz", ".bz2", ".tar"],
|
| 1682 |
+
)
|
| 1683 |
+
def test_read_json_with_very_long_file_path(self, compression):
|
| 1684 |
+
# GH 46718
|
| 1685 |
+
long_json_path = f'{"a" * 1000}.json{compression}'
|
| 1686 |
+
with pytest.raises(
|
| 1687 |
+
FileNotFoundError, match=f"File {long_json_path} does not exist"
|
| 1688 |
+
):
|
| 1689 |
+
# path too long for Windows is handled in file_exists() but raises in
|
| 1690 |
+
# _get_data_from_filepath()
|
| 1691 |
+
read_json(long_json_path)
|
| 1692 |
+
|
| 1693 |
+
@pytest.mark.parametrize(
|
| 1694 |
+
"date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
|
| 1695 |
+
)
|
| 1696 |
+
def test_timedelta_as_label(self, date_format, key):
|
| 1697 |
+
df = DataFrame([[1]], columns=[pd.Timedelta("1D")])
|
| 1698 |
+
expected = f'{{"{key}":{{"0":1}}}}'
|
| 1699 |
+
result = df.to_json(date_format=date_format)
|
| 1700 |
+
|
| 1701 |
+
assert result == expected
|
| 1702 |
+
|
| 1703 |
+
@pytest.mark.parametrize(
|
| 1704 |
+
"orient,expected",
|
| 1705 |
+
[
|
| 1706 |
+
("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"),
|
| 1707 |
+
("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"),
|
| 1708 |
+
# TODO: the below have separate encoding procedures
|
| 1709 |
+
pytest.param(
|
| 1710 |
+
"split",
|
| 1711 |
+
"",
|
| 1712 |
+
marks=pytest.mark.xfail(
|
| 1713 |
+
reason="Produces JSON but not in a consistent manner"
|
| 1714 |
+
),
|
| 1715 |
+
),
|
| 1716 |
+
pytest.param(
|
| 1717 |
+
"table",
|
| 1718 |
+
"",
|
| 1719 |
+
marks=pytest.mark.xfail(
|
| 1720 |
+
reason="Produces JSON but not in a consistent manner"
|
| 1721 |
+
),
|
| 1722 |
+
),
|
| 1723 |
+
],
|
| 1724 |
+
)
|
| 1725 |
+
def test_tuple_labels(self, orient, expected):
|
| 1726 |
+
# GH 20500
|
| 1727 |
+
df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
|
| 1728 |
+
result = df.to_json(orient=orient)
|
| 1729 |
+
assert result == expected
|
| 1730 |
+
|
| 1731 |
+
@pytest.mark.parametrize("indent", [1, 2, 4])
|
| 1732 |
+
def test_to_json_indent(self, indent):
|
| 1733 |
+
# GH 12004
|
| 1734 |
+
df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
|
| 1735 |
+
|
| 1736 |
+
result = df.to_json(indent=indent)
|
| 1737 |
+
spaces = " " * indent
|
| 1738 |
+
expected = f"""{{
|
| 1739 |
+
{spaces}"a":{{
|
| 1740 |
+
{spaces}{spaces}"0":"foo",
|
| 1741 |
+
{spaces}{spaces}"1":"baz"
|
| 1742 |
+
{spaces}}},
|
| 1743 |
+
{spaces}"b":{{
|
| 1744 |
+
{spaces}{spaces}"0":"bar",
|
| 1745 |
+
{spaces}{spaces}"1":"qux"
|
| 1746 |
+
{spaces}}}
|
| 1747 |
+
}}"""
|
| 1748 |
+
|
| 1749 |
+
assert result == expected
|
| 1750 |
+
|
| 1751 |
+
@pytest.mark.skipif(
|
| 1752 |
+
using_string_dtype(),
|
| 1753 |
+
reason="Adjust expected when infer_string is default, no bug here, "
|
| 1754 |
+
"just a complicated parametrization",
|
| 1755 |
+
)
|
| 1756 |
+
@pytest.mark.parametrize(
|
| 1757 |
+
"orient,expected",
|
| 1758 |
+
[
|
| 1759 |
+
(
|
| 1760 |
+
"split",
|
| 1761 |
+
"""{
|
| 1762 |
+
"columns":[
|
| 1763 |
+
"a",
|
| 1764 |
+
"b"
|
| 1765 |
+
],
|
| 1766 |
+
"index":[
|
| 1767 |
+
0,
|
| 1768 |
+
1
|
| 1769 |
+
],
|
| 1770 |
+
"data":[
|
| 1771 |
+
[
|
| 1772 |
+
"foo",
|
| 1773 |
+
"bar"
|
| 1774 |
+
],
|
| 1775 |
+
[
|
| 1776 |
+
"baz",
|
| 1777 |
+
"qux"
|
| 1778 |
+
]
|
| 1779 |
+
]
|
| 1780 |
+
}""",
|
| 1781 |
+
),
|
| 1782 |
+
(
|
| 1783 |
+
"records",
|
| 1784 |
+
"""[
|
| 1785 |
+
{
|
| 1786 |
+
"a":"foo",
|
| 1787 |
+
"b":"bar"
|
| 1788 |
+
},
|
| 1789 |
+
{
|
| 1790 |
+
"a":"baz",
|
| 1791 |
+
"b":"qux"
|
| 1792 |
+
}
|
| 1793 |
+
]""",
|
| 1794 |
+
),
|
| 1795 |
+
(
|
| 1796 |
+
"index",
|
| 1797 |
+
"""{
|
| 1798 |
+
"0":{
|
| 1799 |
+
"a":"foo",
|
| 1800 |
+
"b":"bar"
|
| 1801 |
+
},
|
| 1802 |
+
"1":{
|
| 1803 |
+
"a":"baz",
|
| 1804 |
+
"b":"qux"
|
| 1805 |
+
}
|
| 1806 |
+
}""",
|
| 1807 |
+
),
|
| 1808 |
+
(
|
| 1809 |
+
"columns",
|
| 1810 |
+
"""{
|
| 1811 |
+
"a":{
|
| 1812 |
+
"0":"foo",
|
| 1813 |
+
"1":"baz"
|
| 1814 |
+
},
|
| 1815 |
+
"b":{
|
| 1816 |
+
"0":"bar",
|
| 1817 |
+
"1":"qux"
|
| 1818 |
+
}
|
| 1819 |
+
}""",
|
| 1820 |
+
),
|
| 1821 |
+
(
|
| 1822 |
+
"values",
|
| 1823 |
+
"""[
|
| 1824 |
+
[
|
| 1825 |
+
"foo",
|
| 1826 |
+
"bar"
|
| 1827 |
+
],
|
| 1828 |
+
[
|
| 1829 |
+
"baz",
|
| 1830 |
+
"qux"
|
| 1831 |
+
]
|
| 1832 |
+
]""",
|
| 1833 |
+
),
|
| 1834 |
+
(
|
| 1835 |
+
"table",
|
| 1836 |
+
"""{
|
| 1837 |
+
"schema":{
|
| 1838 |
+
"fields":[
|
| 1839 |
+
{
|
| 1840 |
+
"name":"index",
|
| 1841 |
+
"type":"integer"
|
| 1842 |
+
},
|
| 1843 |
+
{
|
| 1844 |
+
"name":"a",
|
| 1845 |
+
"type":"string"
|
| 1846 |
+
},
|
| 1847 |
+
{
|
| 1848 |
+
"name":"b",
|
| 1849 |
+
"type":"string"
|
| 1850 |
+
}
|
| 1851 |
+
],
|
| 1852 |
+
"primaryKey":[
|
| 1853 |
+
"index"
|
| 1854 |
+
],
|
| 1855 |
+
"pandas_version":"1.4.0"
|
| 1856 |
+
},
|
| 1857 |
+
"data":[
|
| 1858 |
+
{
|
| 1859 |
+
"index":0,
|
| 1860 |
+
"a":"foo",
|
| 1861 |
+
"b":"bar"
|
| 1862 |
+
},
|
| 1863 |
+
{
|
| 1864 |
+
"index":1,
|
| 1865 |
+
"a":"baz",
|
| 1866 |
+
"b":"qux"
|
| 1867 |
+
}
|
| 1868 |
+
]
|
| 1869 |
+
}""",
|
| 1870 |
+
),
|
| 1871 |
+
],
|
| 1872 |
+
)
|
| 1873 |
+
def test_json_indent_all_orients(self, orient, expected):
|
| 1874 |
+
# GH 12004
|
| 1875 |
+
df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
|
| 1876 |
+
result = df.to_json(orient=orient, indent=4)
|
| 1877 |
+
assert result == expected
|
| 1878 |
+
|
| 1879 |
+
def test_json_negative_indent_raises(self):
|
| 1880 |
+
with pytest.raises(ValueError, match="must be a nonnegative integer"):
|
| 1881 |
+
DataFrame().to_json(indent=-1)
|
| 1882 |
+
|
| 1883 |
+
def test_emca_262_nan_inf_support(self):
|
| 1884 |
+
# GH 12213
|
| 1885 |
+
data = StringIO(
|
| 1886 |
+
'["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
|
| 1887 |
+
)
|
| 1888 |
+
result = read_json(data)
|
| 1889 |
+
expected = DataFrame(
|
| 1890 |
+
["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
|
| 1891 |
+
)
|
| 1892 |
+
tm.assert_frame_equal(result, expected)
|
| 1893 |
+
|
| 1894 |
+
def test_frame_int_overflow(self):
|
| 1895 |
+
# GH 30320
|
| 1896 |
+
encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}])
|
| 1897 |
+
expected = DataFrame({"col": ["31900441201190696999", "Text"]})
|
| 1898 |
+
result = read_json(StringIO(encoded_json))
|
| 1899 |
+
tm.assert_frame_equal(result, expected)
|
| 1900 |
+
|
| 1901 |
+
@pytest.mark.parametrize(
|
| 1902 |
+
"dataframe,expected",
|
| 1903 |
+
[
|
| 1904 |
+
(
|
| 1905 |
+
DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}),
|
| 1906 |
+
'{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,'
|
| 1907 |
+
'"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}',
|
| 1908 |
+
)
|
| 1909 |
+
],
|
| 1910 |
+
)
|
| 1911 |
+
def test_json_multiindex(self, dataframe, expected):
|
| 1912 |
+
series = dataframe.stack(future_stack=True)
|
| 1913 |
+
result = series.to_json(orient="index")
|
| 1914 |
+
assert result == expected
|
| 1915 |
+
|
| 1916 |
+
@pytest.mark.single_cpu
|
| 1917 |
+
def test_to_s3(self, s3_public_bucket, s3so):
|
| 1918 |
+
# GH 28375
|
| 1919 |
+
mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
|
| 1920 |
+
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
|
| 1921 |
+
df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
|
| 1922 |
+
timeout = 5
|
| 1923 |
+
while True:
|
| 1924 |
+
if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
|
| 1925 |
+
break
|
| 1926 |
+
time.sleep(0.1)
|
| 1927 |
+
timeout -= 0.1
|
| 1928 |
+
assert timeout > 0, "Timed out waiting for file to appear on moto"
|
| 1929 |
+
|
| 1930 |
+
def test_json_pandas_nulls(self, nulls_fixture, request):
|
| 1931 |
+
# GH 31615
|
| 1932 |
+
if isinstance(nulls_fixture, Decimal):
|
| 1933 |
+
mark = pytest.mark.xfail(reason="not implemented")
|
| 1934 |
+
request.applymarker(mark)
|
| 1935 |
+
|
| 1936 |
+
result = DataFrame([[nulls_fixture]]).to_json()
|
| 1937 |
+
assert result == '{"0":{"0":null}}'
|
| 1938 |
+
|
| 1939 |
+
def test_readjson_bool_series(self):
|
| 1940 |
+
# GH31464
|
| 1941 |
+
result = read_json(StringIO("[true, true, false]"), typ="series")
|
| 1942 |
+
expected = Series([True, True, False])
|
| 1943 |
+
tm.assert_series_equal(result, expected)
|
| 1944 |
+
|
| 1945 |
+
def test_to_json_multiindex_escape(self):
|
| 1946 |
+
# GH 15273
|
| 1947 |
+
df = DataFrame(
|
| 1948 |
+
True,
|
| 1949 |
+
index=date_range("2017-01-20", "2017-01-23"),
|
| 1950 |
+
columns=["foo", "bar"],
|
| 1951 |
+
).stack(future_stack=True)
|
| 1952 |
+
result = df.to_json()
|
| 1953 |
+
expected = (
|
| 1954 |
+
"{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true,"
|
| 1955 |
+
"\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true,"
|
| 1956 |
+
"\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true,"
|
| 1957 |
+
"\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true,"
|
| 1958 |
+
"\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true,"
|
| 1959 |
+
"\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true,"
|
| 1960 |
+
"\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true,"
|
| 1961 |
+
"\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}"
|
| 1962 |
+
)
|
| 1963 |
+
assert result == expected
|
| 1964 |
+
|
| 1965 |
+
def test_to_json_series_of_objects(self):
|
| 1966 |
+
class _TestObject:
|
| 1967 |
+
def __init__(self, a, b, _c, d) -> None:
|
| 1968 |
+
self.a = a
|
| 1969 |
+
self.b = b
|
| 1970 |
+
self._c = _c
|
| 1971 |
+
self.d = d
|
| 1972 |
+
|
| 1973 |
+
def e(self):
|
| 1974 |
+
return 5
|
| 1975 |
+
|
| 1976 |
+
# JSON keys should be all non-callable non-underscore attributes, see GH-42768
|
| 1977 |
+
series = Series([_TestObject(a=1, b=2, _c=3, d=4)])
|
| 1978 |
+
assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}}
|
| 1979 |
+
|
| 1980 |
+
@pytest.mark.parametrize(
|
| 1981 |
+
"data,expected",
|
| 1982 |
+
[
|
| 1983 |
+
(
|
| 1984 |
+
Series({0: -6 + 8j, 1: 0 + 1j, 2: 9 - 5j}),
|
| 1985 |
+
'{"0":{"imag":8.0,"real":-6.0},'
|
| 1986 |
+
'"1":{"imag":1.0,"real":0.0},'
|
| 1987 |
+
'"2":{"imag":-5.0,"real":9.0}}',
|
| 1988 |
+
),
|
| 1989 |
+
(
|
| 1990 |
+
Series({0: -9.39 + 0.66j, 1: 3.95 + 9.32j, 2: 4.03 - 0.17j}),
|
| 1991 |
+
'{"0":{"imag":0.66,"real":-9.39},'
|
| 1992 |
+
'"1":{"imag":9.32,"real":3.95},'
|
| 1993 |
+
'"2":{"imag":-0.17,"real":4.03}}',
|
| 1994 |
+
),
|
| 1995 |
+
(
|
| 1996 |
+
DataFrame([[-2 + 3j, -1 - 0j], [4 - 3j, -0 - 10j]]),
|
| 1997 |
+
'{"0":{"0":{"imag":3.0,"real":-2.0},'
|
| 1998 |
+
'"1":{"imag":-3.0,"real":4.0}},'
|
| 1999 |
+
'"1":{"0":{"imag":0.0,"real":-1.0},'
|
| 2000 |
+
'"1":{"imag":-10.0,"real":0.0}}}',
|
| 2001 |
+
),
|
| 2002 |
+
(
|
| 2003 |
+
DataFrame(
|
| 2004 |
+
[[-0.28 + 0.34j, -1.08 - 0.39j], [0.41 - 0.34j, -0.78 - 1.35j]]
|
| 2005 |
+
),
|
| 2006 |
+
'{"0":{"0":{"imag":0.34,"real":-0.28},'
|
| 2007 |
+
'"1":{"imag":-0.34,"real":0.41}},'
|
| 2008 |
+
'"1":{"0":{"imag":-0.39,"real":-1.08},'
|
| 2009 |
+
'"1":{"imag":-1.35,"real":-0.78}}}',
|
| 2010 |
+
),
|
| 2011 |
+
],
|
| 2012 |
+
)
|
| 2013 |
+
def test_complex_data_tojson(self, data, expected):
|
| 2014 |
+
# GH41174
|
| 2015 |
+
result = data.to_json()
|
| 2016 |
+
assert result == expected
|
| 2017 |
+
|
| 2018 |
+
def test_json_uint64(self):
|
| 2019 |
+
# GH21073
|
| 2020 |
+
expected = (
|
| 2021 |
+
'{"columns":["col1"],"index":[0,1],'
|
| 2022 |
+
'"data":[[13342205958987758245],[12388075603347835679]]}'
|
| 2023 |
+
)
|
| 2024 |
+
df = DataFrame(data={"col1": [13342205958987758245, 12388075603347835679]})
|
| 2025 |
+
result = df.to_json(orient="split")
|
| 2026 |
+
assert result == expected
|
| 2027 |
+
|
| 2028 |
+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
|
| 2029 |
+
def test_read_json_dtype_backend(
|
| 2030 |
+
self, string_storage, dtype_backend, orient, using_infer_string
|
| 2031 |
+
):
|
| 2032 |
+
# GH#50750
|
| 2033 |
+
df = DataFrame(
|
| 2034 |
+
{
|
| 2035 |
+
"a": Series([1, np.nan, 3], dtype="Int64"),
|
| 2036 |
+
"b": Series([1, 2, 3], dtype="Int64"),
|
| 2037 |
+
"c": Series([1.5, np.nan, 2.5], dtype="Float64"),
|
| 2038 |
+
"d": Series([1.5, 2.0, 2.5], dtype="Float64"),
|
| 2039 |
+
"e": [True, False, None],
|
| 2040 |
+
"f": [True, False, True],
|
| 2041 |
+
"g": ["a", "b", "c"],
|
| 2042 |
+
"h": ["a", "b", None],
|
| 2043 |
+
}
|
| 2044 |
+
)
|
| 2045 |
+
|
| 2046 |
+
out = df.to_json(orient=orient)
|
| 2047 |
+
with pd.option_context("mode.string_storage", string_storage):
|
| 2048 |
+
result = read_json(
|
| 2049 |
+
StringIO(out), dtype_backend=dtype_backend, orient=orient
|
| 2050 |
+
)
|
| 2051 |
+
|
| 2052 |
+
if dtype_backend == "pyarrow":
|
| 2053 |
+
pa = pytest.importorskip("pyarrow")
|
| 2054 |
+
string_dtype = pd.ArrowDtype(pa.string())
|
| 2055 |
+
else:
|
| 2056 |
+
string_dtype = pd.StringDtype(string_storage)
|
| 2057 |
+
|
| 2058 |
+
expected = DataFrame(
|
| 2059 |
+
{
|
| 2060 |
+
"a": Series([1, np.nan, 3], dtype="Int64"),
|
| 2061 |
+
"b": Series([1, 2, 3], dtype="Int64"),
|
| 2062 |
+
"c": Series([1.5, np.nan, 2.5], dtype="Float64"),
|
| 2063 |
+
"d": Series([1.5, 2.0, 2.5], dtype="Float64"),
|
| 2064 |
+
"e": Series([True, False, NA], dtype="boolean"),
|
| 2065 |
+
"f": Series([True, False, True], dtype="boolean"),
|
| 2066 |
+
"g": Series(["a", "b", "c"], dtype=string_dtype),
|
| 2067 |
+
"h": Series(["a", "b", None], dtype=string_dtype),
|
| 2068 |
+
}
|
| 2069 |
+
)
|
| 2070 |
+
|
| 2071 |
+
if dtype_backend == "pyarrow":
|
| 2072 |
+
pa = pytest.importorskip("pyarrow")
|
| 2073 |
+
from pandas.arrays import ArrowExtensionArray
|
| 2074 |
+
|
| 2075 |
+
expected = DataFrame(
|
| 2076 |
+
{
|
| 2077 |
+
col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
|
| 2078 |
+
for col in expected.columns
|
| 2079 |
+
}
|
| 2080 |
+
)
|
| 2081 |
+
|
| 2082 |
+
if orient == "values":
|
| 2083 |
+
expected.columns = list(range(8))
|
| 2084 |
+
|
| 2085 |
+
# the storage of the str columns' Index is also affected by the
|
| 2086 |
+
# string_storage setting -> ignore that for checking the result
|
| 2087 |
+
tm.assert_frame_equal(result, expected, check_column_type=False)
|
| 2088 |
+
|
| 2089 |
+
@pytest.mark.parametrize("orient", ["split", "records", "index"])
|
| 2090 |
+
def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
|
| 2091 |
+
# GH#50750
|
| 2092 |
+
pa = pytest.importorskip("pyarrow")
|
| 2093 |
+
ser = Series([1, np.nan, 3], dtype="Int64")
|
| 2094 |
+
|
| 2095 |
+
out = ser.to_json(orient=orient)
|
| 2096 |
+
with pd.option_context("mode.string_storage", string_storage):
|
| 2097 |
+
result = read_json(
|
| 2098 |
+
StringIO(out), dtype_backend=dtype_backend, orient=orient, typ="series"
|
| 2099 |
+
)
|
| 2100 |
+
|
| 2101 |
+
expected = Series([1, np.nan, 3], dtype="Int64")
|
| 2102 |
+
|
| 2103 |
+
if dtype_backend == "pyarrow":
|
| 2104 |
+
from pandas.arrays import ArrowExtensionArray
|
| 2105 |
+
|
| 2106 |
+
expected = Series(ArrowExtensionArray(pa.array(expected, from_pandas=True)))
|
| 2107 |
+
|
| 2108 |
+
tm.assert_series_equal(result, expected)
|
| 2109 |
+
|
| 2110 |
+
def test_invalid_dtype_backend(self):
|
| 2111 |
+
msg = (
|
| 2112 |
+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
|
| 2113 |
+
"'pyarrow' are allowed."
|
| 2114 |
+
)
|
| 2115 |
+
with pytest.raises(ValueError, match=msg):
|
| 2116 |
+
read_json("test", dtype_backend="numpy")
|
| 2117 |
+
|
| 2118 |
+
|
| 2119 |
+
def test_invalid_engine():
|
| 2120 |
+
# GH 48893
|
| 2121 |
+
ser = Series(range(1))
|
| 2122 |
+
out = ser.to_json()
|
| 2123 |
+
with pytest.raises(ValueError, match="The engine type foo"):
|
| 2124 |
+
read_json(out, engine="foo")
|
| 2125 |
+
|
| 2126 |
+
|
| 2127 |
+
def test_pyarrow_engine_lines_false():
|
| 2128 |
+
# GH 48893
|
| 2129 |
+
ser = Series(range(1))
|
| 2130 |
+
out = ser.to_json()
|
| 2131 |
+
with pytest.raises(ValueError, match="currently pyarrow engine only supports"):
|
| 2132 |
+
read_json(out, engine="pyarrow", lines=False)
|
| 2133 |
+
|
| 2134 |
+
|
| 2135 |
+
def test_json_roundtrip_string_inference(orient):
|
| 2136 |
+
df = DataFrame(
|
| 2137 |
+
[["a", "b"], ["c", "d"]], index=["row 1", "row 2"], columns=["col 1", "col 2"]
|
| 2138 |
+
)
|
| 2139 |
+
out = df.to_json()
|
| 2140 |
+
with pd.option_context("future.infer_string", True):
|
| 2141 |
+
result = read_json(StringIO(out))
|
| 2142 |
+
dtype = pd.StringDtype(na_value=np.nan)
|
| 2143 |
+
expected = DataFrame(
|
| 2144 |
+
[["a", "b"], ["c", "d"]],
|
| 2145 |
+
dtype=dtype,
|
| 2146 |
+
index=Index(["row 1", "row 2"], dtype=dtype),
|
| 2147 |
+
columns=Index(["col 1", "col 2"], dtype=dtype),
|
| 2148 |
+
)
|
| 2149 |
+
tm.assert_frame_equal(result, expected)
|
| 2150 |
+
|
| 2151 |
+
|
| 2152 |
+
def test_json_pos_args_deprecation():
|
| 2153 |
+
# GH-54229
|
| 2154 |
+
df = DataFrame({"a": [1, 2, 3]})
|
| 2155 |
+
msg = (
|
| 2156 |
+
r"Starting with pandas version 3.0 all arguments of to_json except for the "
|
| 2157 |
+
r"argument 'path_or_buf' will be keyword-only."
|
| 2158 |
+
)
|
| 2159 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 2160 |
+
buf = BytesIO()
|
| 2161 |
+
df.to_json(buf, "split")
|
| 2162 |
+
|
| 2163 |
+
|
| 2164 |
+
@td.skip_if_no("pyarrow")
|
| 2165 |
+
def test_to_json_ea_null():
|
| 2166 |
+
# GH#57224
|
| 2167 |
+
df = DataFrame(
|
| 2168 |
+
{
|
| 2169 |
+
"a": Series([1, NA], dtype="int64[pyarrow]"),
|
| 2170 |
+
"b": Series([2, NA], dtype="Int64"),
|
| 2171 |
+
}
|
| 2172 |
+
)
|
| 2173 |
+
result = df.to_json(orient="records", lines=True)
|
| 2174 |
+
expected = """{"a":1,"b":2}
|
| 2175 |
+
{"a":null,"b":null}
|
| 2176 |
+
"""
|
| 2177 |
+
assert result == expected
|
| 2178 |
+
|
| 2179 |
+
|
| 2180 |
+
def test_read_json_lines_rangeindex():
|
| 2181 |
+
# GH 57429
|
| 2182 |
+
data = """
|
| 2183 |
+
{"a": 1, "b": 2}
|
| 2184 |
+
{"a": 3, "b": 4}
|
| 2185 |
+
"""
|
| 2186 |
+
result = read_json(StringIO(data), lines=True).index
|
| 2187 |
+
expected = RangeIndex(2)
|
| 2188 |
+
tm.assert_index_equal(result, expected, exact=True)
|
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py
ADDED
|
@@ -0,0 +1,1087 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import calendar
|
| 2 |
+
import datetime
|
| 3 |
+
import decimal
|
| 4 |
+
import json
|
| 5 |
+
import locale
|
| 6 |
+
import math
|
| 7 |
+
import re
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
import dateutil
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pytest
|
| 13 |
+
import pytz
|
| 14 |
+
|
| 15 |
+
import pandas._libs.json as ujson
|
| 16 |
+
from pandas.compat import IS64
|
| 17 |
+
|
| 18 |
+
from pandas import (
|
| 19 |
+
DataFrame,
|
| 20 |
+
DatetimeIndex,
|
| 21 |
+
Index,
|
| 22 |
+
NaT,
|
| 23 |
+
PeriodIndex,
|
| 24 |
+
Series,
|
| 25 |
+
Timedelta,
|
| 26 |
+
Timestamp,
|
| 27 |
+
date_range,
|
| 28 |
+
)
|
| 29 |
+
import pandas._testing as tm
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _clean_dict(d):
|
| 33 |
+
"""
|
| 34 |
+
Sanitize dictionary for JSON by converting all keys to strings.
|
| 35 |
+
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
d : dict
|
| 39 |
+
The dictionary to convert.
|
| 40 |
+
|
| 41 |
+
Returns
|
| 42 |
+
-------
|
| 43 |
+
cleaned_dict : dict
|
| 44 |
+
"""
|
| 45 |
+
return {str(k): v for k, v in d.items()}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@pytest.fixture(
|
| 49 |
+
params=[None, "split", "records", "values", "index"] # Column indexed by default.
|
| 50 |
+
)
|
| 51 |
+
def orient(request):
|
| 52 |
+
return request.param
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class TestUltraJSONTests:
|
| 56 |
+
@pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
|
| 57 |
+
def test_encode_decimal(self):
|
| 58 |
+
sut = decimal.Decimal("1337.1337")
|
| 59 |
+
encoded = ujson.ujson_dumps(sut, double_precision=15)
|
| 60 |
+
decoded = ujson.ujson_loads(encoded)
|
| 61 |
+
assert decoded == 1337.1337
|
| 62 |
+
|
| 63 |
+
sut = decimal.Decimal("0.95")
|
| 64 |
+
encoded = ujson.ujson_dumps(sut, double_precision=1)
|
| 65 |
+
assert encoded == "1.0"
|
| 66 |
+
|
| 67 |
+
decoded = ujson.ujson_loads(encoded)
|
| 68 |
+
assert decoded == 1.0
|
| 69 |
+
|
| 70 |
+
sut = decimal.Decimal("0.94")
|
| 71 |
+
encoded = ujson.ujson_dumps(sut, double_precision=1)
|
| 72 |
+
assert encoded == "0.9"
|
| 73 |
+
|
| 74 |
+
decoded = ujson.ujson_loads(encoded)
|
| 75 |
+
assert decoded == 0.9
|
| 76 |
+
|
| 77 |
+
sut = decimal.Decimal("1.95")
|
| 78 |
+
encoded = ujson.ujson_dumps(sut, double_precision=1)
|
| 79 |
+
assert encoded == "2.0"
|
| 80 |
+
|
| 81 |
+
decoded = ujson.ujson_loads(encoded)
|
| 82 |
+
assert decoded == 2.0
|
| 83 |
+
|
| 84 |
+
sut = decimal.Decimal("-1.95")
|
| 85 |
+
encoded = ujson.ujson_dumps(sut, double_precision=1)
|
| 86 |
+
assert encoded == "-2.0"
|
| 87 |
+
|
| 88 |
+
decoded = ujson.ujson_loads(encoded)
|
| 89 |
+
assert decoded == -2.0
|
| 90 |
+
|
| 91 |
+
sut = decimal.Decimal("0.995")
|
| 92 |
+
encoded = ujson.ujson_dumps(sut, double_precision=2)
|
| 93 |
+
assert encoded == "1.0"
|
| 94 |
+
|
| 95 |
+
decoded = ujson.ujson_loads(encoded)
|
| 96 |
+
assert decoded == 1.0
|
| 97 |
+
|
| 98 |
+
sut = decimal.Decimal("0.9995")
|
| 99 |
+
encoded = ujson.ujson_dumps(sut, double_precision=3)
|
| 100 |
+
assert encoded == "1.0"
|
| 101 |
+
|
| 102 |
+
decoded = ujson.ujson_loads(encoded)
|
| 103 |
+
assert decoded == 1.0
|
| 104 |
+
|
| 105 |
+
sut = decimal.Decimal("0.99999999999999944")
|
| 106 |
+
encoded = ujson.ujson_dumps(sut, double_precision=15)
|
| 107 |
+
assert encoded == "1.0"
|
| 108 |
+
|
| 109 |
+
decoded = ujson.ujson_loads(encoded)
|
| 110 |
+
assert decoded == 1.0
|
| 111 |
+
|
| 112 |
+
@pytest.mark.parametrize("ensure_ascii", [True, False])
|
| 113 |
+
def test_encode_string_conversion(self, ensure_ascii):
|
| 114 |
+
string_input = "A string \\ / \b \f \n \r \t </script> &"
|
| 115 |
+
not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"'
|
| 116 |
+
html_encoded = (
|
| 117 |
+
'"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"'
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
def helper(expected_output, **encode_kwargs):
|
| 121 |
+
output = ujson.ujson_dumps(
|
| 122 |
+
string_input, ensure_ascii=ensure_ascii, **encode_kwargs
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
assert output == expected_output
|
| 126 |
+
assert string_input == json.loads(output)
|
| 127 |
+
assert string_input == ujson.ujson_loads(output)
|
| 128 |
+
|
| 129 |
+
# Default behavior assumes encode_html_chars=False.
|
| 130 |
+
helper(not_html_encoded)
|
| 131 |
+
|
| 132 |
+
# Make sure explicit encode_html_chars=False works.
|
| 133 |
+
helper(not_html_encoded, encode_html_chars=False)
|
| 134 |
+
|
| 135 |
+
# Make sure explicit encode_html_chars=True does the encoding.
|
| 136 |
+
helper(html_encoded, encode_html_chars=True)
|
| 137 |
+
|
| 138 |
+
@pytest.mark.parametrize(
|
| 139 |
+
"long_number", [-4342969734183514, -12345678901234.56789012, -528656961.4399388]
|
| 140 |
+
)
|
| 141 |
+
def test_double_long_numbers(self, long_number):
|
| 142 |
+
sut = {"a": long_number}
|
| 143 |
+
encoded = ujson.ujson_dumps(sut, double_precision=15)
|
| 144 |
+
|
| 145 |
+
decoded = ujson.ujson_loads(encoded)
|
| 146 |
+
assert sut == decoded
|
| 147 |
+
|
| 148 |
+
def test_encode_non_c_locale(self):
|
| 149 |
+
lc_category = locale.LC_NUMERIC
|
| 150 |
+
|
| 151 |
+
# We just need one of these locales to work.
|
| 152 |
+
for new_locale in ("it_IT.UTF-8", "Italian_Italy"):
|
| 153 |
+
if tm.can_set_locale(new_locale, lc_category):
|
| 154 |
+
with tm.set_locale(new_locale, lc_category):
|
| 155 |
+
assert ujson.ujson_loads(ujson.ujson_dumps(4.78e60)) == 4.78e60
|
| 156 |
+
assert ujson.ujson_loads("4.78", precise_float=True) == 4.78
|
| 157 |
+
break
|
| 158 |
+
|
| 159 |
+
def test_decimal_decode_test_precise(self):
|
| 160 |
+
sut = {"a": 4.56}
|
| 161 |
+
encoded = ujson.ujson_dumps(sut)
|
| 162 |
+
decoded = ujson.ujson_loads(encoded, precise_float=True)
|
| 163 |
+
assert sut == decoded
|
| 164 |
+
|
| 165 |
+
def test_encode_double_tiny_exponential(self):
|
| 166 |
+
num = 1e-40
|
| 167 |
+
assert num == ujson.ujson_loads(ujson.ujson_dumps(num))
|
| 168 |
+
num = 1e-100
|
| 169 |
+
assert num == ujson.ujson_loads(ujson.ujson_dumps(num))
|
| 170 |
+
num = -1e-45
|
| 171 |
+
assert num == ujson.ujson_loads(ujson.ujson_dumps(num))
|
| 172 |
+
num = -1e-145
|
| 173 |
+
assert np.allclose(num, ujson.ujson_loads(ujson.ujson_dumps(num)))
|
| 174 |
+
|
| 175 |
+
@pytest.mark.parametrize("unicode_key", ["key1", "بن"])
|
| 176 |
+
def test_encode_dict_with_unicode_keys(self, unicode_key):
|
| 177 |
+
unicode_dict = {unicode_key: "value1"}
|
| 178 |
+
assert unicode_dict == ujson.ujson_loads(ujson.ujson_dumps(unicode_dict))
|
| 179 |
+
|
| 180 |
+
@pytest.mark.parametrize(
|
| 181 |
+
"double_input", [math.pi, -math.pi] # Should work with negatives too.
|
| 182 |
+
)
|
| 183 |
+
def test_encode_double_conversion(self, double_input):
|
| 184 |
+
output = ujson.ujson_dumps(double_input)
|
| 185 |
+
assert round(double_input, 5) == round(json.loads(output), 5)
|
| 186 |
+
assert round(double_input, 5) == round(ujson.ujson_loads(output), 5)
|
| 187 |
+
|
| 188 |
+
def test_encode_with_decimal(self):
|
| 189 |
+
decimal_input = 1.0
|
| 190 |
+
output = ujson.ujson_dumps(decimal_input)
|
| 191 |
+
|
| 192 |
+
assert output == "1.0"
|
| 193 |
+
|
| 194 |
+
def test_encode_array_of_nested_arrays(self):
|
| 195 |
+
nested_input = [[[[]]]] * 20
|
| 196 |
+
output = ujson.ujson_dumps(nested_input)
|
| 197 |
+
|
| 198 |
+
assert nested_input == json.loads(output)
|
| 199 |
+
assert nested_input == ujson.ujson_loads(output)
|
| 200 |
+
|
| 201 |
+
def test_encode_array_of_doubles(self):
|
| 202 |
+
doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10
|
| 203 |
+
output = ujson.ujson_dumps(doubles_input)
|
| 204 |
+
|
| 205 |
+
assert doubles_input == json.loads(output)
|
| 206 |
+
assert doubles_input == ujson.ujson_loads(output)
|
| 207 |
+
|
| 208 |
+
def test_double_precision(self):
|
| 209 |
+
double_input = 30.012345678901234
|
| 210 |
+
output = ujson.ujson_dumps(double_input, double_precision=15)
|
| 211 |
+
|
| 212 |
+
assert double_input == json.loads(output)
|
| 213 |
+
assert double_input == ujson.ujson_loads(output)
|
| 214 |
+
|
| 215 |
+
for double_precision in (3, 9):
|
| 216 |
+
output = ujson.ujson_dumps(double_input, double_precision=double_precision)
|
| 217 |
+
rounded_input = round(double_input, double_precision)
|
| 218 |
+
|
| 219 |
+
assert rounded_input == json.loads(output)
|
| 220 |
+
assert rounded_input == ujson.ujson_loads(output)
|
| 221 |
+
|
| 222 |
+
@pytest.mark.parametrize(
|
| 223 |
+
"invalid_val",
|
| 224 |
+
[
|
| 225 |
+
20,
|
| 226 |
+
-1,
|
| 227 |
+
"9",
|
| 228 |
+
None,
|
| 229 |
+
],
|
| 230 |
+
)
|
| 231 |
+
def test_invalid_double_precision(self, invalid_val):
|
| 232 |
+
double_input = 30.12345678901234567890
|
| 233 |
+
expected_exception = ValueError if isinstance(invalid_val, int) else TypeError
|
| 234 |
+
msg = (
|
| 235 |
+
r"Invalid value '.*' for option 'double_precision', max is '15'|"
|
| 236 |
+
r"an integer is required \(got type |"
|
| 237 |
+
r"object cannot be interpreted as an integer"
|
| 238 |
+
)
|
| 239 |
+
with pytest.raises(expected_exception, match=msg):
|
| 240 |
+
ujson.ujson_dumps(double_input, double_precision=invalid_val)
|
| 241 |
+
|
| 242 |
+
def test_encode_string_conversion2(self):
|
| 243 |
+
string_input = "A string \\ / \b \f \n \r \t"
|
| 244 |
+
output = ujson.ujson_dumps(string_input)
|
| 245 |
+
|
| 246 |
+
assert string_input == json.loads(output)
|
| 247 |
+
assert string_input == ujson.ujson_loads(output)
|
| 248 |
+
assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
|
| 249 |
+
|
| 250 |
+
@pytest.mark.parametrize(
|
| 251 |
+
"unicode_input",
|
| 252 |
+
["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"],
|
| 253 |
+
)
|
| 254 |
+
def test_encode_unicode_conversion(self, unicode_input):
|
| 255 |
+
enc = ujson.ujson_dumps(unicode_input)
|
| 256 |
+
dec = ujson.ujson_loads(enc)
|
| 257 |
+
|
| 258 |
+
assert enc == json.dumps(unicode_input)
|
| 259 |
+
assert dec == json.loads(enc)
|
| 260 |
+
|
| 261 |
+
def test_encode_control_escaping(self):
|
| 262 |
+
escaped_input = "\x19"
|
| 263 |
+
enc = ujson.ujson_dumps(escaped_input)
|
| 264 |
+
dec = ujson.ujson_loads(enc)
|
| 265 |
+
|
| 266 |
+
assert escaped_input == dec
|
| 267 |
+
assert enc == json.dumps(escaped_input)
|
| 268 |
+
|
| 269 |
+
def test_encode_unicode_surrogate_pair(self):
|
| 270 |
+
surrogate_input = "\xf0\x90\x8d\x86"
|
| 271 |
+
enc = ujson.ujson_dumps(surrogate_input)
|
| 272 |
+
dec = ujson.ujson_loads(enc)
|
| 273 |
+
|
| 274 |
+
assert enc == json.dumps(surrogate_input)
|
| 275 |
+
assert dec == json.loads(enc)
|
| 276 |
+
|
| 277 |
+
def test_encode_unicode_4bytes_utf8(self):
|
| 278 |
+
four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
|
| 279 |
+
enc = ujson.ujson_dumps(four_bytes_input)
|
| 280 |
+
dec = ujson.ujson_loads(enc)
|
| 281 |
+
|
| 282 |
+
assert enc == json.dumps(four_bytes_input)
|
| 283 |
+
assert dec == json.loads(enc)
|
| 284 |
+
|
| 285 |
+
def test_encode_unicode_4bytes_utf8highest(self):
|
| 286 |
+
four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
|
| 287 |
+
enc = ujson.ujson_dumps(four_bytes_input)
|
| 288 |
+
|
| 289 |
+
dec = ujson.ujson_loads(enc)
|
| 290 |
+
|
| 291 |
+
assert enc == json.dumps(four_bytes_input)
|
| 292 |
+
assert dec == json.loads(enc)
|
| 293 |
+
|
| 294 |
+
def test_encode_unicode_error(self):
|
| 295 |
+
string = "'\udac0'"
|
| 296 |
+
msg = (
|
| 297 |
+
r"'utf-8' codec can't encode character '\\udac0' "
|
| 298 |
+
r"in position 1: surrogates not allowed"
|
| 299 |
+
)
|
| 300 |
+
with pytest.raises(UnicodeEncodeError, match=msg):
|
| 301 |
+
ujson.ujson_dumps([string])
|
| 302 |
+
|
| 303 |
+
def test_encode_array_in_array(self):
|
| 304 |
+
arr_in_arr_input = [[[[]]]]
|
| 305 |
+
output = ujson.ujson_dumps(arr_in_arr_input)
|
| 306 |
+
|
| 307 |
+
assert arr_in_arr_input == json.loads(output)
|
| 308 |
+
assert output == json.dumps(arr_in_arr_input)
|
| 309 |
+
assert arr_in_arr_input == ujson.ujson_loads(output)
|
| 310 |
+
|
| 311 |
+
@pytest.mark.parametrize(
|
| 312 |
+
"num_input",
|
| 313 |
+
[
|
| 314 |
+
31337,
|
| 315 |
+
-31337, # Negative number.
|
| 316 |
+
-9223372036854775808, # Large negative number.
|
| 317 |
+
],
|
| 318 |
+
)
|
| 319 |
+
def test_encode_num_conversion(self, num_input):
|
| 320 |
+
output = ujson.ujson_dumps(num_input)
|
| 321 |
+
assert num_input == json.loads(output)
|
| 322 |
+
assert output == json.dumps(num_input)
|
| 323 |
+
assert num_input == ujson.ujson_loads(output)
|
| 324 |
+
|
| 325 |
+
def test_encode_list_conversion(self):
|
| 326 |
+
list_input = [1, 2, 3, 4]
|
| 327 |
+
output = ujson.ujson_dumps(list_input)
|
| 328 |
+
|
| 329 |
+
assert list_input == json.loads(output)
|
| 330 |
+
assert list_input == ujson.ujson_loads(output)
|
| 331 |
+
|
| 332 |
+
def test_encode_dict_conversion(self):
|
| 333 |
+
dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
|
| 334 |
+
output = ujson.ujson_dumps(dict_input)
|
| 335 |
+
|
| 336 |
+
assert dict_input == json.loads(output)
|
| 337 |
+
assert dict_input == ujson.ujson_loads(output)
|
| 338 |
+
|
| 339 |
+
@pytest.mark.parametrize("builtin_value", [None, True, False])
|
| 340 |
+
def test_encode_builtin_values_conversion(self, builtin_value):
|
| 341 |
+
output = ujson.ujson_dumps(builtin_value)
|
| 342 |
+
assert builtin_value == json.loads(output)
|
| 343 |
+
assert output == json.dumps(builtin_value)
|
| 344 |
+
assert builtin_value == ujson.ujson_loads(output)
|
| 345 |
+
|
| 346 |
+
def test_encode_datetime_conversion(self):
|
| 347 |
+
datetime_input = datetime.datetime.fromtimestamp(time.time())
|
| 348 |
+
output = ujson.ujson_dumps(datetime_input, date_unit="s")
|
| 349 |
+
expected = calendar.timegm(datetime_input.utctimetuple())
|
| 350 |
+
|
| 351 |
+
assert int(expected) == json.loads(output)
|
| 352 |
+
assert int(expected) == ujson.ujson_loads(output)
|
| 353 |
+
|
| 354 |
+
def test_encode_date_conversion(self):
|
| 355 |
+
date_input = datetime.date.fromtimestamp(time.time())
|
| 356 |
+
output = ujson.ujson_dumps(date_input, date_unit="s")
|
| 357 |
+
|
| 358 |
+
tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0)
|
| 359 |
+
expected = calendar.timegm(tup)
|
| 360 |
+
|
| 361 |
+
assert int(expected) == json.loads(output)
|
| 362 |
+
assert int(expected) == ujson.ujson_loads(output)
|
| 363 |
+
|
| 364 |
+
@pytest.mark.parametrize(
|
| 365 |
+
"test",
|
| 366 |
+
[datetime.time(), datetime.time(1, 2, 3), datetime.time(10, 12, 15, 343243)],
|
| 367 |
+
)
|
| 368 |
+
def test_encode_time_conversion_basic(self, test):
|
| 369 |
+
output = ujson.ujson_dumps(test)
|
| 370 |
+
expected = f'"{test.isoformat()}"'
|
| 371 |
+
assert expected == output
|
| 372 |
+
|
| 373 |
+
def test_encode_time_conversion_pytz(self):
|
| 374 |
+
# see gh-11473: to_json segfaults with timezone-aware datetimes
|
| 375 |
+
test = datetime.time(10, 12, 15, 343243, pytz.utc)
|
| 376 |
+
output = ujson.ujson_dumps(test)
|
| 377 |
+
expected = f'"{test.isoformat()}"'
|
| 378 |
+
assert expected == output
|
| 379 |
+
|
| 380 |
+
def test_encode_time_conversion_dateutil(self):
|
| 381 |
+
# see gh-11473: to_json segfaults with timezone-aware datetimes
|
| 382 |
+
test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
|
| 383 |
+
output = ujson.ujson_dumps(test)
|
| 384 |
+
expected = f'"{test.isoformat()}"'
|
| 385 |
+
assert expected == output
|
| 386 |
+
|
| 387 |
+
@pytest.mark.parametrize(
|
| 388 |
+
"decoded_input", [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf]
|
| 389 |
+
)
|
| 390 |
+
def test_encode_as_null(self, decoded_input):
|
| 391 |
+
assert ujson.ujson_dumps(decoded_input) == "null", "Expected null"
|
| 392 |
+
|
| 393 |
+
def test_datetime_units(self):
|
| 394 |
+
val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
|
| 395 |
+
stamp = Timestamp(val).as_unit("ns")
|
| 396 |
+
|
| 397 |
+
roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="s"))
|
| 398 |
+
assert roundtrip == stamp._value // 10**9
|
| 399 |
+
|
| 400 |
+
roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="ms"))
|
| 401 |
+
assert roundtrip == stamp._value // 10**6
|
| 402 |
+
|
| 403 |
+
roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="us"))
|
| 404 |
+
assert roundtrip == stamp._value // 10**3
|
| 405 |
+
|
| 406 |
+
roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="ns"))
|
| 407 |
+
assert roundtrip == stamp._value
|
| 408 |
+
|
| 409 |
+
msg = "Invalid value 'foo' for option 'date_unit'"
|
| 410 |
+
with pytest.raises(ValueError, match=msg):
|
| 411 |
+
ujson.ujson_dumps(val, date_unit="foo")
|
| 412 |
+
|
| 413 |
+
def test_encode_to_utf8(self):
|
| 414 |
+
unencoded = "\xe6\x97\xa5\xd1\x88"
|
| 415 |
+
|
| 416 |
+
enc = ujson.ujson_dumps(unencoded, ensure_ascii=False)
|
| 417 |
+
dec = ujson.ujson_loads(enc)
|
| 418 |
+
|
| 419 |
+
assert enc == json.dumps(unencoded, ensure_ascii=False)
|
| 420 |
+
assert dec == json.loads(enc)
|
| 421 |
+
|
| 422 |
+
def test_decode_from_unicode(self):
|
| 423 |
+
unicode_input = '{"obj": 31337}'
|
| 424 |
+
|
| 425 |
+
dec1 = ujson.ujson_loads(unicode_input)
|
| 426 |
+
dec2 = ujson.ujson_loads(str(unicode_input))
|
| 427 |
+
|
| 428 |
+
assert dec1 == dec2
|
| 429 |
+
|
| 430 |
+
def test_encode_recursion_max(self):
|
| 431 |
+
# 8 is the max recursion depth
|
| 432 |
+
|
| 433 |
+
class O2:
|
| 434 |
+
member = 0
|
| 435 |
+
|
| 436 |
+
class O1:
|
| 437 |
+
member = 0
|
| 438 |
+
|
| 439 |
+
decoded_input = O1()
|
| 440 |
+
decoded_input.member = O2()
|
| 441 |
+
decoded_input.member.member = decoded_input
|
| 442 |
+
|
| 443 |
+
with pytest.raises(OverflowError, match="Maximum recursion level reached"):
|
| 444 |
+
ujson.ujson_dumps(decoded_input)
|
| 445 |
+
|
| 446 |
+
def test_decode_jibberish(self):
|
| 447 |
+
jibberish = "fdsa sda v9sa fdsa"
|
| 448 |
+
msg = "Unexpected character found when decoding 'false'"
|
| 449 |
+
with pytest.raises(ValueError, match=msg):
|
| 450 |
+
ujson.ujson_loads(jibberish)
|
| 451 |
+
|
| 452 |
+
@pytest.mark.parametrize(
|
| 453 |
+
"broken_json",
|
| 454 |
+
[
|
| 455 |
+
"[", # Broken array start.
|
| 456 |
+
"{", # Broken object start.
|
| 457 |
+
"]", # Broken array end.
|
| 458 |
+
"}", # Broken object end.
|
| 459 |
+
],
|
| 460 |
+
)
|
| 461 |
+
def test_decode_broken_json(self, broken_json):
|
| 462 |
+
msg = "Expected object or value"
|
| 463 |
+
with pytest.raises(ValueError, match=msg):
|
| 464 |
+
ujson.ujson_loads(broken_json)
|
| 465 |
+
|
| 466 |
+
@pytest.mark.parametrize("too_big_char", ["[", "{"])
|
| 467 |
+
def test_decode_depth_too_big(self, too_big_char):
|
| 468 |
+
with pytest.raises(ValueError, match="Reached object decoding depth limit"):
|
| 469 |
+
ujson.ujson_loads(too_big_char * (1024 * 1024))
|
| 470 |
+
|
| 471 |
+
@pytest.mark.parametrize(
|
| 472 |
+
"bad_string",
|
| 473 |
+
[
|
| 474 |
+
'"TESTING', # Unterminated.
|
| 475 |
+
'"TESTING\\"', # Unterminated escape.
|
| 476 |
+
"tru", # Broken True.
|
| 477 |
+
"fa", # Broken False.
|
| 478 |
+
"n", # Broken None.
|
| 479 |
+
],
|
| 480 |
+
)
|
| 481 |
+
def test_decode_bad_string(self, bad_string):
|
| 482 |
+
msg = (
|
| 483 |
+
"Unexpected character found when decoding|"
|
| 484 |
+
"Unmatched ''\"' when when decoding 'string'"
|
| 485 |
+
)
|
| 486 |
+
with pytest.raises(ValueError, match=msg):
|
| 487 |
+
ujson.ujson_loads(bad_string)
|
| 488 |
+
|
| 489 |
+
@pytest.mark.parametrize(
|
| 490 |
+
"broken_json, err_msg",
|
| 491 |
+
[
|
| 492 |
+
(
|
| 493 |
+
'{{1337:""}}',
|
| 494 |
+
"Key name of object must be 'string' when decoding 'object'",
|
| 495 |
+
),
|
| 496 |
+
('{{"key":"}', "Unmatched ''\"' when when decoding 'string'"),
|
| 497 |
+
("[[[true", "Unexpected character found when decoding array value (2)"),
|
| 498 |
+
],
|
| 499 |
+
)
|
| 500 |
+
def test_decode_broken_json_leak(self, broken_json, err_msg):
|
| 501 |
+
for _ in range(1000):
|
| 502 |
+
with pytest.raises(ValueError, match=re.escape(err_msg)):
|
| 503 |
+
ujson.ujson_loads(broken_json)
|
| 504 |
+
|
| 505 |
+
@pytest.mark.parametrize(
|
| 506 |
+
"invalid_dict",
|
| 507 |
+
[
|
| 508 |
+
"{{{{31337}}}}", # No key.
|
| 509 |
+
'{{{{"key":}}}}', # No value.
|
| 510 |
+
'{{{{"key"}}}}', # No colon or value.
|
| 511 |
+
],
|
| 512 |
+
)
|
| 513 |
+
def test_decode_invalid_dict(self, invalid_dict):
|
| 514 |
+
msg = (
|
| 515 |
+
"Key name of object must be 'string' when decoding 'object'|"
|
| 516 |
+
"No ':' found when decoding object value|"
|
| 517 |
+
"Expected object or value"
|
| 518 |
+
)
|
| 519 |
+
with pytest.raises(ValueError, match=msg):
|
| 520 |
+
ujson.ujson_loads(invalid_dict)
|
| 521 |
+
|
| 522 |
+
@pytest.mark.parametrize(
|
| 523 |
+
"numeric_int_as_str", ["31337", "-31337"] # Should work with negatives.
|
| 524 |
+
)
|
| 525 |
+
def test_decode_numeric_int(self, numeric_int_as_str):
|
| 526 |
+
assert int(numeric_int_as_str) == ujson.ujson_loads(numeric_int_as_str)
|
| 527 |
+
|
| 528 |
+
def test_encode_null_character(self):
|
| 529 |
+
wrapped_input = "31337 \x00 1337"
|
| 530 |
+
output = ujson.ujson_dumps(wrapped_input)
|
| 531 |
+
|
| 532 |
+
assert wrapped_input == json.loads(output)
|
| 533 |
+
assert output == json.dumps(wrapped_input)
|
| 534 |
+
assert wrapped_input == ujson.ujson_loads(output)
|
| 535 |
+
|
| 536 |
+
alone_input = "\x00"
|
| 537 |
+
output = ujson.ujson_dumps(alone_input)
|
| 538 |
+
|
| 539 |
+
assert alone_input == json.loads(output)
|
| 540 |
+
assert output == json.dumps(alone_input)
|
| 541 |
+
assert alone_input == ujson.ujson_loads(output)
|
| 542 |
+
assert '" \\u0000\\r\\n "' == ujson.ujson_dumps(" \u0000\r\n ")
|
| 543 |
+
|
| 544 |
+
def test_decode_null_character(self):
|
| 545 |
+
wrapped_input = '"31337 \\u0000 31337"'
|
| 546 |
+
assert ujson.ujson_loads(wrapped_input) == json.loads(wrapped_input)
|
| 547 |
+
|
| 548 |
+
def test_encode_list_long_conversion(self):
|
| 549 |
+
long_input = [
|
| 550 |
+
9223372036854775807,
|
| 551 |
+
9223372036854775807,
|
| 552 |
+
9223372036854775807,
|
| 553 |
+
9223372036854775807,
|
| 554 |
+
9223372036854775807,
|
| 555 |
+
9223372036854775807,
|
| 556 |
+
]
|
| 557 |
+
output = ujson.ujson_dumps(long_input)
|
| 558 |
+
|
| 559 |
+
assert long_input == json.loads(output)
|
| 560 |
+
assert long_input == ujson.ujson_loads(output)
|
| 561 |
+
|
| 562 |
+
@pytest.mark.parametrize("long_input", [9223372036854775807, 18446744073709551615])
|
| 563 |
+
def test_encode_long_conversion(self, long_input):
|
| 564 |
+
output = ujson.ujson_dumps(long_input)
|
| 565 |
+
|
| 566 |
+
assert long_input == json.loads(output)
|
| 567 |
+
assert output == json.dumps(long_input)
|
| 568 |
+
assert long_input == ujson.ujson_loads(output)
|
| 569 |
+
|
| 570 |
+
@pytest.mark.parametrize("bigNum", [2**64, -(2**63) - 1])
|
| 571 |
+
def test_dumps_ints_larger_than_maxsize(self, bigNum):
|
| 572 |
+
encoding = ujson.ujson_dumps(bigNum)
|
| 573 |
+
assert str(bigNum) == encoding
|
| 574 |
+
|
| 575 |
+
with pytest.raises(
|
| 576 |
+
ValueError,
|
| 577 |
+
match="Value is too big|Value is too small",
|
| 578 |
+
):
|
| 579 |
+
assert ujson.ujson_loads(encoding) == bigNum
|
| 580 |
+
|
| 581 |
+
@pytest.mark.parametrize(
|
| 582 |
+
"int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"]
|
| 583 |
+
)
|
| 584 |
+
def test_decode_numeric_int_exp(self, int_exp):
|
| 585 |
+
assert ujson.ujson_loads(int_exp) == json.loads(int_exp)
|
| 586 |
+
|
| 587 |
+
def test_loads_non_str_bytes_raises(self):
|
| 588 |
+
msg = "a bytes-like object is required, not 'NoneType'"
|
| 589 |
+
with pytest.raises(TypeError, match=msg):
|
| 590 |
+
ujson.ujson_loads(None)
|
| 591 |
+
|
| 592 |
+
@pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1])
|
| 593 |
+
def test_decode_number_with_32bit_sign_bit(self, val):
|
| 594 |
+
# Test that numbers that fit within 32 bits but would have the
|
| 595 |
+
# sign bit set (2**31 <= x < 2**32) are decoded properly.
|
| 596 |
+
doc = f'{{"id": {val}}}'
|
| 597 |
+
assert ujson.ujson_loads(doc)["id"] == val
|
| 598 |
+
|
| 599 |
+
def test_encode_big_escape(self):
|
| 600 |
+
# Make sure no Exception is raised.
|
| 601 |
+
for _ in range(10):
|
| 602 |
+
base = "\u00e5".encode()
|
| 603 |
+
escape_input = base * 1024 * 1024 * 2
|
| 604 |
+
ujson.ujson_dumps(escape_input)
|
| 605 |
+
|
| 606 |
+
def test_decode_big_escape(self):
|
| 607 |
+
# Make sure no Exception is raised.
|
| 608 |
+
for _ in range(10):
|
| 609 |
+
base = "\u00e5".encode()
|
| 610 |
+
quote = b'"'
|
| 611 |
+
|
| 612 |
+
escape_input = quote + (base * 1024 * 1024 * 2) + quote
|
| 613 |
+
ujson.ujson_loads(escape_input)
|
| 614 |
+
|
| 615 |
+
def test_to_dict(self):
|
| 616 |
+
d = {"key": 31337}
|
| 617 |
+
|
| 618 |
+
class DictTest:
|
| 619 |
+
def toDict(self):
|
| 620 |
+
return d
|
| 621 |
+
|
| 622 |
+
o = DictTest()
|
| 623 |
+
output = ujson.ujson_dumps(o)
|
| 624 |
+
|
| 625 |
+
dec = ujson.ujson_loads(output)
|
| 626 |
+
assert dec == d
|
| 627 |
+
|
| 628 |
+
def test_default_handler(self):
|
| 629 |
+
class _TestObject:
|
| 630 |
+
def __init__(self, val) -> None:
|
| 631 |
+
self.val = val
|
| 632 |
+
|
| 633 |
+
@property
|
| 634 |
+
def recursive_attr(self):
|
| 635 |
+
return _TestObject("recursive_attr")
|
| 636 |
+
|
| 637 |
+
def __str__(self) -> str:
|
| 638 |
+
return str(self.val)
|
| 639 |
+
|
| 640 |
+
msg = "Maximum recursion level reached"
|
| 641 |
+
with pytest.raises(OverflowError, match=msg):
|
| 642 |
+
ujson.ujson_dumps(_TestObject("foo"))
|
| 643 |
+
assert '"foo"' == ujson.ujson_dumps(_TestObject("foo"), default_handler=str)
|
| 644 |
+
|
| 645 |
+
def my_handler(_):
|
| 646 |
+
return "foobar"
|
| 647 |
+
|
| 648 |
+
assert '"foobar"' == ujson.ujson_dumps(
|
| 649 |
+
_TestObject("foo"), default_handler=my_handler
|
| 650 |
+
)
|
| 651 |
+
|
| 652 |
+
def my_handler_raises(_):
|
| 653 |
+
raise TypeError("I raise for anything")
|
| 654 |
+
|
| 655 |
+
with pytest.raises(TypeError, match="I raise for anything"):
|
| 656 |
+
ujson.ujson_dumps(_TestObject("foo"), default_handler=my_handler_raises)
|
| 657 |
+
|
| 658 |
+
def my_int_handler(_):
|
| 659 |
+
return 42
|
| 660 |
+
|
| 661 |
+
assert (
|
| 662 |
+
ujson.ujson_loads(
|
| 663 |
+
ujson.ujson_dumps(_TestObject("foo"), default_handler=my_int_handler)
|
| 664 |
+
)
|
| 665 |
+
== 42
|
| 666 |
+
)
|
| 667 |
+
|
| 668 |
+
def my_obj_handler(_):
|
| 669 |
+
return datetime.datetime(2013, 2, 3)
|
| 670 |
+
|
| 671 |
+
assert ujson.ujson_loads(
|
| 672 |
+
ujson.ujson_dumps(datetime.datetime(2013, 2, 3))
|
| 673 |
+
) == ujson.ujson_loads(
|
| 674 |
+
ujson.ujson_dumps(_TestObject("foo"), default_handler=my_obj_handler)
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
obj_list = [_TestObject("foo"), _TestObject("bar")]
|
| 678 |
+
assert json.loads(json.dumps(obj_list, default=str)) == ujson.ujson_loads(
|
| 679 |
+
ujson.ujson_dumps(obj_list, default_handler=str)
|
| 680 |
+
)
|
| 681 |
+
|
| 682 |
+
def test_encode_object(self):
|
| 683 |
+
class _TestObject:
|
| 684 |
+
def __init__(self, a, b, _c, d) -> None:
|
| 685 |
+
self.a = a
|
| 686 |
+
self.b = b
|
| 687 |
+
self._c = _c
|
| 688 |
+
self.d = d
|
| 689 |
+
|
| 690 |
+
def e(self):
|
| 691 |
+
return 5
|
| 692 |
+
|
| 693 |
+
# JSON keys should be all non-callable non-underscore attributes, see GH-42768
|
| 694 |
+
test_object = _TestObject(a=1, b=2, _c=3, d=4)
|
| 695 |
+
assert ujson.ujson_loads(ujson.ujson_dumps(test_object)) == {
|
| 696 |
+
"a": 1,
|
| 697 |
+
"b": 2,
|
| 698 |
+
"d": 4,
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
def test_ujson__name__(self):
|
| 702 |
+
# GH 52898
|
| 703 |
+
assert ujson.__name__ == "pandas._libs.json"
|
| 704 |
+
|
| 705 |
+
|
| 706 |
+
class TestNumpyJSONTests:
|
| 707 |
+
@pytest.mark.parametrize("bool_input", [True, False])
|
| 708 |
+
def test_bool(self, bool_input):
|
| 709 |
+
b = bool(bool_input)
|
| 710 |
+
assert ujson.ujson_loads(ujson.ujson_dumps(b)) == b
|
| 711 |
+
|
| 712 |
+
def test_bool_array(self):
|
| 713 |
+
bool_array = np.array(
|
| 714 |
+
[True, False, True, True, False, True, False, False], dtype=bool
|
| 715 |
+
)
|
| 716 |
+
output = np.array(ujson.ujson_loads(ujson.ujson_dumps(bool_array)), dtype=bool)
|
| 717 |
+
tm.assert_numpy_array_equal(bool_array, output)
|
| 718 |
+
|
| 719 |
+
def test_int(self, any_int_numpy_dtype):
|
| 720 |
+
klass = np.dtype(any_int_numpy_dtype).type
|
| 721 |
+
num = klass(1)
|
| 722 |
+
|
| 723 |
+
assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num
|
| 724 |
+
|
| 725 |
+
def test_int_array(self, any_int_numpy_dtype):
|
| 726 |
+
arr = np.arange(100, dtype=int)
|
| 727 |
+
arr_input = arr.astype(any_int_numpy_dtype)
|
| 728 |
+
|
| 729 |
+
arr_output = np.array(
|
| 730 |
+
ujson.ujson_loads(ujson.ujson_dumps(arr_input)), dtype=any_int_numpy_dtype
|
| 731 |
+
)
|
| 732 |
+
tm.assert_numpy_array_equal(arr_input, arr_output)
|
| 733 |
+
|
| 734 |
+
def test_int_max(self, any_int_numpy_dtype):
|
| 735 |
+
if any_int_numpy_dtype in ("int64", "uint64") and not IS64:
|
| 736 |
+
pytest.skip("Cannot test 64-bit integer on 32-bit platform")
|
| 737 |
+
|
| 738 |
+
klass = np.dtype(any_int_numpy_dtype).type
|
| 739 |
+
|
| 740 |
+
# uint64 max will always overflow,
|
| 741 |
+
# as it's encoded to signed.
|
| 742 |
+
if any_int_numpy_dtype == "uint64":
|
| 743 |
+
num = np.iinfo("int64").max
|
| 744 |
+
else:
|
| 745 |
+
num = np.iinfo(any_int_numpy_dtype).max
|
| 746 |
+
|
| 747 |
+
assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num
|
| 748 |
+
|
| 749 |
+
def test_float(self, float_numpy_dtype):
|
| 750 |
+
klass = np.dtype(float_numpy_dtype).type
|
| 751 |
+
num = klass(256.2013)
|
| 752 |
+
|
| 753 |
+
assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num
|
| 754 |
+
|
| 755 |
+
def test_float_array(self, float_numpy_dtype):
|
| 756 |
+
arr = np.arange(12.5, 185.72, 1.7322, dtype=float)
|
| 757 |
+
float_input = arr.astype(float_numpy_dtype)
|
| 758 |
+
|
| 759 |
+
float_output = np.array(
|
| 760 |
+
ujson.ujson_loads(ujson.ujson_dumps(float_input, double_precision=15)),
|
| 761 |
+
dtype=float_numpy_dtype,
|
| 762 |
+
)
|
| 763 |
+
tm.assert_almost_equal(float_input, float_output)
|
| 764 |
+
|
| 765 |
+
def test_float_max(self, float_numpy_dtype):
|
| 766 |
+
klass = np.dtype(float_numpy_dtype).type
|
| 767 |
+
num = klass(np.finfo(float_numpy_dtype).max / 10)
|
| 768 |
+
|
| 769 |
+
tm.assert_almost_equal(
|
| 770 |
+
klass(ujson.ujson_loads(ujson.ujson_dumps(num, double_precision=15))), num
|
| 771 |
+
)
|
| 772 |
+
|
| 773 |
+
def test_array_basic(self):
|
| 774 |
+
arr = np.arange(96)
|
| 775 |
+
arr = arr.reshape((2, 2, 2, 2, 3, 2))
|
| 776 |
+
|
| 777 |
+
tm.assert_numpy_array_equal(
|
| 778 |
+
np.array(ujson.ujson_loads(ujson.ujson_dumps(arr))), arr
|
| 779 |
+
)
|
| 780 |
+
|
| 781 |
+
@pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)])
|
| 782 |
+
def test_array_reshaped(self, shape):
|
| 783 |
+
arr = np.arange(100)
|
| 784 |
+
arr = arr.reshape(shape)
|
| 785 |
+
|
| 786 |
+
tm.assert_numpy_array_equal(
|
| 787 |
+
np.array(ujson.ujson_loads(ujson.ujson_dumps(arr))), arr
|
| 788 |
+
)
|
| 789 |
+
|
| 790 |
+
def test_array_list(self):
|
| 791 |
+
arr_list = [
|
| 792 |
+
"a",
|
| 793 |
+
[],
|
| 794 |
+
{},
|
| 795 |
+
{},
|
| 796 |
+
[],
|
| 797 |
+
42,
|
| 798 |
+
97.8,
|
| 799 |
+
["a", "b"],
|
| 800 |
+
{"key": "val"},
|
| 801 |
+
]
|
| 802 |
+
arr = np.array(arr_list, dtype=object)
|
| 803 |
+
result = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=object)
|
| 804 |
+
tm.assert_numpy_array_equal(result, arr)
|
| 805 |
+
|
| 806 |
+
def test_array_float(self):
|
| 807 |
+
dtype = np.float32
|
| 808 |
+
|
| 809 |
+
arr = np.arange(100.202, 200.202, 1, dtype=dtype)
|
| 810 |
+
arr = arr.reshape((5, 5, 4))
|
| 811 |
+
|
| 812 |
+
arr_out = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=dtype)
|
| 813 |
+
tm.assert_almost_equal(arr, arr_out)
|
| 814 |
+
|
| 815 |
+
def test_0d_array(self):
|
| 816 |
+
# gh-18878
|
| 817 |
+
msg = re.escape(
|
| 818 |
+
"array(1) (numpy-scalar) is not JSON serializable at the moment"
|
| 819 |
+
)
|
| 820 |
+
with pytest.raises(TypeError, match=msg):
|
| 821 |
+
ujson.ujson_dumps(np.array(1))
|
| 822 |
+
|
| 823 |
+
def test_array_long_double(self):
|
| 824 |
+
msg = re.compile(
|
| 825 |
+
"1234.5.* \\(numpy-scalar\\) is not JSON serializable at the moment"
|
| 826 |
+
)
|
| 827 |
+
with pytest.raises(TypeError, match=msg):
|
| 828 |
+
ujson.ujson_dumps(np.longdouble(1234.5))
|
| 829 |
+
|
| 830 |
+
|
| 831 |
+
class TestPandasJSONTests:
|
| 832 |
+
def test_dataframe(self, orient):
|
| 833 |
+
dtype = np.int64
|
| 834 |
+
|
| 835 |
+
df = DataFrame(
|
| 836 |
+
[[1, 2, 3], [4, 5, 6]],
|
| 837 |
+
index=["a", "b"],
|
| 838 |
+
columns=["x", "y", "z"],
|
| 839 |
+
dtype=dtype,
|
| 840 |
+
)
|
| 841 |
+
encode_kwargs = {} if orient is None else {"orient": orient}
|
| 842 |
+
assert (df.dtypes == dtype).all()
|
| 843 |
+
|
| 844 |
+
output = ujson.ujson_loads(ujson.ujson_dumps(df, **encode_kwargs))
|
| 845 |
+
assert (df.dtypes == dtype).all()
|
| 846 |
+
|
| 847 |
+
# Ensure proper DataFrame initialization.
|
| 848 |
+
if orient == "split":
|
| 849 |
+
dec = _clean_dict(output)
|
| 850 |
+
output = DataFrame(**dec)
|
| 851 |
+
else:
|
| 852 |
+
output = DataFrame(output)
|
| 853 |
+
|
| 854 |
+
# Corrections to enable DataFrame comparison.
|
| 855 |
+
if orient == "values":
|
| 856 |
+
df.columns = [0, 1, 2]
|
| 857 |
+
df.index = [0, 1]
|
| 858 |
+
elif orient == "records":
|
| 859 |
+
df.index = [0, 1]
|
| 860 |
+
elif orient == "index":
|
| 861 |
+
df = df.transpose()
|
| 862 |
+
|
| 863 |
+
assert (df.dtypes == dtype).all()
|
| 864 |
+
tm.assert_frame_equal(output, df)
|
| 865 |
+
|
| 866 |
+
def test_dataframe_nested(self, orient):
|
| 867 |
+
df = DataFrame(
|
| 868 |
+
[[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"]
|
| 869 |
+
)
|
| 870 |
+
|
| 871 |
+
nested = {"df1": df, "df2": df.copy()}
|
| 872 |
+
kwargs = {} if orient is None else {"orient": orient}
|
| 873 |
+
|
| 874 |
+
exp = {
|
| 875 |
+
"df1": ujson.ujson_loads(ujson.ujson_dumps(df, **kwargs)),
|
| 876 |
+
"df2": ujson.ujson_loads(ujson.ujson_dumps(df, **kwargs)),
|
| 877 |
+
}
|
| 878 |
+
assert ujson.ujson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp
|
| 879 |
+
|
| 880 |
+
def test_series(self, orient):
|
| 881 |
+
dtype = np.int64
|
| 882 |
+
s = Series(
|
| 883 |
+
[10, 20, 30, 40, 50, 60],
|
| 884 |
+
name="series",
|
| 885 |
+
index=[6, 7, 8, 9, 10, 15],
|
| 886 |
+
dtype=dtype,
|
| 887 |
+
).sort_values()
|
| 888 |
+
assert s.dtype == dtype
|
| 889 |
+
|
| 890 |
+
encode_kwargs = {} if orient is None else {"orient": orient}
|
| 891 |
+
|
| 892 |
+
output = ujson.ujson_loads(ujson.ujson_dumps(s, **encode_kwargs))
|
| 893 |
+
assert s.dtype == dtype
|
| 894 |
+
|
| 895 |
+
if orient == "split":
|
| 896 |
+
dec = _clean_dict(output)
|
| 897 |
+
output = Series(**dec)
|
| 898 |
+
else:
|
| 899 |
+
output = Series(output)
|
| 900 |
+
|
| 901 |
+
if orient in (None, "index"):
|
| 902 |
+
s.name = None
|
| 903 |
+
output = output.sort_values()
|
| 904 |
+
s.index = ["6", "7", "8", "9", "10", "15"]
|
| 905 |
+
elif orient in ("records", "values"):
|
| 906 |
+
s.name = None
|
| 907 |
+
s.index = [0, 1, 2, 3, 4, 5]
|
| 908 |
+
|
| 909 |
+
assert s.dtype == dtype
|
| 910 |
+
tm.assert_series_equal(output, s)
|
| 911 |
+
|
| 912 |
+
def test_series_nested(self, orient):
|
| 913 |
+
s = Series(
|
| 914 |
+
[10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]
|
| 915 |
+
).sort_values()
|
| 916 |
+
nested = {"s1": s, "s2": s.copy()}
|
| 917 |
+
kwargs = {} if orient is None else {"orient": orient}
|
| 918 |
+
|
| 919 |
+
exp = {
|
| 920 |
+
"s1": ujson.ujson_loads(ujson.ujson_dumps(s, **kwargs)),
|
| 921 |
+
"s2": ujson.ujson_loads(ujson.ujson_dumps(s, **kwargs)),
|
| 922 |
+
}
|
| 923 |
+
assert ujson.ujson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp
|
| 924 |
+
|
| 925 |
+
def test_index(self):
|
| 926 |
+
i = Index([23, 45, 18, 98, 43, 11], name="index")
|
| 927 |
+
|
| 928 |
+
# Column indexed.
|
| 929 |
+
output = Index(ujson.ujson_loads(ujson.ujson_dumps(i)), name="index")
|
| 930 |
+
tm.assert_index_equal(i, output)
|
| 931 |
+
|
| 932 |
+
dec = _clean_dict(ujson.ujson_loads(ujson.ujson_dumps(i, orient="split")))
|
| 933 |
+
output = Index(**dec)
|
| 934 |
+
|
| 935 |
+
tm.assert_index_equal(i, output)
|
| 936 |
+
assert i.name == output.name
|
| 937 |
+
|
| 938 |
+
tm.assert_index_equal(i, output)
|
| 939 |
+
assert i.name == output.name
|
| 940 |
+
|
| 941 |
+
output = Index(
|
| 942 |
+
ujson.ujson_loads(ujson.ujson_dumps(i, orient="values")), name="index"
|
| 943 |
+
)
|
| 944 |
+
tm.assert_index_equal(i, output)
|
| 945 |
+
|
| 946 |
+
output = Index(
|
| 947 |
+
ujson.ujson_loads(ujson.ujson_dumps(i, orient="records")), name="index"
|
| 948 |
+
)
|
| 949 |
+
tm.assert_index_equal(i, output)
|
| 950 |
+
|
| 951 |
+
output = Index(
|
| 952 |
+
ujson.ujson_loads(ujson.ujson_dumps(i, orient="index")), name="index"
|
| 953 |
+
)
|
| 954 |
+
tm.assert_index_equal(i, output)
|
| 955 |
+
|
| 956 |
+
def test_datetime_index(self):
|
| 957 |
+
date_unit = "ns"
|
| 958 |
+
|
| 959 |
+
# freq doesn't round-trip
|
| 960 |
+
rng = DatetimeIndex(list(date_range("1/1/2000", periods=20)), freq=None)
|
| 961 |
+
encoded = ujson.ujson_dumps(rng, date_unit=date_unit)
|
| 962 |
+
|
| 963 |
+
decoded = DatetimeIndex(np.array(ujson.ujson_loads(encoded)))
|
| 964 |
+
tm.assert_index_equal(rng, decoded)
|
| 965 |
+
|
| 966 |
+
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
| 967 |
+
decoded = Series(ujson.ujson_loads(ujson.ujson_dumps(ts, date_unit=date_unit)))
|
| 968 |
+
|
| 969 |
+
idx_values = decoded.index.values.astype(np.int64)
|
| 970 |
+
decoded.index = DatetimeIndex(idx_values)
|
| 971 |
+
tm.assert_series_equal(ts, decoded)
|
| 972 |
+
|
| 973 |
+
@pytest.mark.parametrize(
|
| 974 |
+
"invalid_arr",
|
| 975 |
+
[
|
| 976 |
+
"[31337,]", # Trailing comma.
|
| 977 |
+
"[,31337]", # Leading comma.
|
| 978 |
+
"[]]", # Unmatched bracket.
|
| 979 |
+
"[,]", # Only comma.
|
| 980 |
+
],
|
| 981 |
+
)
|
| 982 |
+
def test_decode_invalid_array(self, invalid_arr):
|
| 983 |
+
msg = (
|
| 984 |
+
"Expected object or value|Trailing data|"
|
| 985 |
+
"Unexpected character found when decoding array value"
|
| 986 |
+
)
|
| 987 |
+
with pytest.raises(ValueError, match=msg):
|
| 988 |
+
ujson.ujson_loads(invalid_arr)
|
| 989 |
+
|
| 990 |
+
@pytest.mark.parametrize("arr", [[], [31337]])
|
| 991 |
+
def test_decode_array(self, arr):
|
| 992 |
+
assert arr == ujson.ujson_loads(str(arr))
|
| 993 |
+
|
| 994 |
+
@pytest.mark.parametrize("extreme_num", [9223372036854775807, -9223372036854775808])
|
| 995 |
+
def test_decode_extreme_numbers(self, extreme_num):
|
| 996 |
+
assert extreme_num == ujson.ujson_loads(str(extreme_num))
|
| 997 |
+
|
| 998 |
+
@pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-2**63-1}"])
|
| 999 |
+
def test_decode_too_extreme_numbers(self, too_extreme_num):
|
| 1000 |
+
with pytest.raises(
|
| 1001 |
+
ValueError,
|
| 1002 |
+
match="Value is too big|Value is too small",
|
| 1003 |
+
):
|
| 1004 |
+
ujson.ujson_loads(too_extreme_num)
|
| 1005 |
+
|
| 1006 |
+
def test_decode_with_trailing_whitespaces(self):
|
| 1007 |
+
assert {} == ujson.ujson_loads("{}\n\t ")
|
| 1008 |
+
|
| 1009 |
+
def test_decode_with_trailing_non_whitespaces(self):
|
| 1010 |
+
with pytest.raises(ValueError, match="Trailing data"):
|
| 1011 |
+
ujson.ujson_loads("{}\n\t a")
|
| 1012 |
+
|
| 1013 |
+
@pytest.mark.parametrize("value", [f"{2**64}", f"{-2**63-1}"])
|
| 1014 |
+
def test_decode_array_with_big_int(self, value):
|
| 1015 |
+
with pytest.raises(
|
| 1016 |
+
ValueError,
|
| 1017 |
+
match="Value is too big|Value is too small",
|
| 1018 |
+
):
|
| 1019 |
+
ujson.ujson_loads(value)
|
| 1020 |
+
|
| 1021 |
+
@pytest.mark.parametrize(
|
| 1022 |
+
"float_number",
|
| 1023 |
+
[
|
| 1024 |
+
1.1234567893,
|
| 1025 |
+
1.234567893,
|
| 1026 |
+
1.34567893,
|
| 1027 |
+
1.4567893,
|
| 1028 |
+
1.567893,
|
| 1029 |
+
1.67893,
|
| 1030 |
+
1.7893,
|
| 1031 |
+
1.893,
|
| 1032 |
+
1.3,
|
| 1033 |
+
],
|
| 1034 |
+
)
|
| 1035 |
+
@pytest.mark.parametrize("sign", [-1, 1])
|
| 1036 |
+
def test_decode_floating_point(self, sign, float_number):
|
| 1037 |
+
float_number *= sign
|
| 1038 |
+
tm.assert_almost_equal(
|
| 1039 |
+
float_number, ujson.ujson_loads(str(float_number)), rtol=1e-15
|
| 1040 |
+
)
|
| 1041 |
+
|
| 1042 |
+
def test_encode_big_set(self):
|
| 1043 |
+
s = set()
|
| 1044 |
+
|
| 1045 |
+
for x in range(100000):
|
| 1046 |
+
s.add(x)
|
| 1047 |
+
|
| 1048 |
+
# Make sure no Exception is raised.
|
| 1049 |
+
ujson.ujson_dumps(s)
|
| 1050 |
+
|
| 1051 |
+
def test_encode_empty_set(self):
|
| 1052 |
+
assert "[]" == ujson.ujson_dumps(set())
|
| 1053 |
+
|
| 1054 |
+
def test_encode_set(self):
|
| 1055 |
+
s = {1, 2, 3, 4, 5, 6, 7, 8, 9}
|
| 1056 |
+
enc = ujson.ujson_dumps(s)
|
| 1057 |
+
dec = ujson.ujson_loads(enc)
|
| 1058 |
+
|
| 1059 |
+
for v in dec:
|
| 1060 |
+
assert v in s
|
| 1061 |
+
|
| 1062 |
+
@pytest.mark.parametrize(
|
| 1063 |
+
"td",
|
| 1064 |
+
[
|
| 1065 |
+
Timedelta(days=366),
|
| 1066 |
+
Timedelta(days=-1),
|
| 1067 |
+
Timedelta(hours=13, minutes=5, seconds=5),
|
| 1068 |
+
Timedelta(hours=13, minutes=20, seconds=30),
|
| 1069 |
+
Timedelta(days=-1, nanoseconds=5),
|
| 1070 |
+
Timedelta(nanoseconds=1),
|
| 1071 |
+
Timedelta(microseconds=1, nanoseconds=1),
|
| 1072 |
+
Timedelta(milliseconds=1, microseconds=1, nanoseconds=1),
|
| 1073 |
+
Timedelta(milliseconds=999, microseconds=999, nanoseconds=999),
|
| 1074 |
+
],
|
| 1075 |
+
)
|
| 1076 |
+
def test_encode_timedelta_iso(self, td):
|
| 1077 |
+
# GH 28256
|
| 1078 |
+
result = ujson.ujson_dumps(td, iso_dates=True)
|
| 1079 |
+
expected = f'"{td.isoformat()}"'
|
| 1080 |
+
|
| 1081 |
+
assert result == expected
|
| 1082 |
+
|
| 1083 |
+
def test_encode_periodindex(self):
|
| 1084 |
+
# GH 46683
|
| 1085 |
+
p = PeriodIndex(["2022-04-06", "2022-04-07"], freq="D")
|
| 1086 |
+
df = DataFrame(index=p)
|
| 1087 |
+
assert df.to_json() == "{}"
|
py311/lib/python3.11/site-packages/pandas/tests/io/parser/test_concatenate_chunks.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.errors import DtypeWarning
|
| 5 |
+
|
| 6 |
+
import pandas._testing as tm
|
| 7 |
+
from pandas.core.arrays import ArrowExtensionArray
|
| 8 |
+
|
| 9 |
+
from pandas.io.parsers.c_parser_wrapper import _concatenate_chunks
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_concatenate_chunks_pyarrow():
|
| 13 |
+
# GH#51876
|
| 14 |
+
pa = pytest.importorskip("pyarrow")
|
| 15 |
+
chunks = [
|
| 16 |
+
{0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
|
| 17 |
+
{0: ArrowExtensionArray(pa.array([1, 2]))},
|
| 18 |
+
]
|
| 19 |
+
result = _concatenate_chunks(chunks)
|
| 20 |
+
expected = ArrowExtensionArray(pa.array([1.5, 2.5, 1.0, 2.0]))
|
| 21 |
+
tm.assert_extension_array_equal(result[0], expected)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_concatenate_chunks_pyarrow_strings():
|
| 25 |
+
# GH#51876
|
| 26 |
+
pa = pytest.importorskip("pyarrow")
|
| 27 |
+
chunks = [
|
| 28 |
+
{0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
|
| 29 |
+
{0: ArrowExtensionArray(pa.array(["a", "b"]))},
|
| 30 |
+
]
|
| 31 |
+
with tm.assert_produces_warning(DtypeWarning, match="have mixed types"):
|
| 32 |
+
result = _concatenate_chunks(chunks)
|
| 33 |
+
expected = np.concatenate(
|
| 34 |
+
[np.array([1.5, 2.5], dtype=object), np.array(["a", "b"])]
|
| 35 |
+
)
|
| 36 |
+
tm.assert_numpy_array_equal(result[0], expected)
|
py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_categorical.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Categorical,
|
| 6 |
+
DataFrame,
|
| 7 |
+
Series,
|
| 8 |
+
_testing as tm,
|
| 9 |
+
concat,
|
| 10 |
+
read_hdf,
|
| 11 |
+
)
|
| 12 |
+
from pandas.tests.io.pytables.common import (
|
| 13 |
+
_maybe_remove,
|
| 14 |
+
ensure_clean_store,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
pytestmark = [pytest.mark.single_cpu]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_categorical(setup_path):
|
| 21 |
+
with ensure_clean_store(setup_path) as store:
|
| 22 |
+
# Basic
|
| 23 |
+
_maybe_remove(store, "s")
|
| 24 |
+
s = Series(
|
| 25 |
+
Categorical(
|
| 26 |
+
["a", "b", "b", "a", "a", "c"],
|
| 27 |
+
categories=["a", "b", "c", "d"],
|
| 28 |
+
ordered=False,
|
| 29 |
+
)
|
| 30 |
+
)
|
| 31 |
+
store.append("s", s, format="table")
|
| 32 |
+
result = store.select("s")
|
| 33 |
+
tm.assert_series_equal(s, result)
|
| 34 |
+
|
| 35 |
+
_maybe_remove(store, "s_ordered")
|
| 36 |
+
s = Series(
|
| 37 |
+
Categorical(
|
| 38 |
+
["a", "b", "b", "a", "a", "c"],
|
| 39 |
+
categories=["a", "b", "c", "d"],
|
| 40 |
+
ordered=True,
|
| 41 |
+
)
|
| 42 |
+
)
|
| 43 |
+
store.append("s_ordered", s, format="table")
|
| 44 |
+
result = store.select("s_ordered")
|
| 45 |
+
tm.assert_series_equal(s, result)
|
| 46 |
+
|
| 47 |
+
_maybe_remove(store, "df")
|
| 48 |
+
df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
|
| 49 |
+
store.append("df", df, format="table")
|
| 50 |
+
result = store.select("df")
|
| 51 |
+
tm.assert_frame_equal(result, df)
|
| 52 |
+
|
| 53 |
+
# Dtypes
|
| 54 |
+
_maybe_remove(store, "si")
|
| 55 |
+
s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
|
| 56 |
+
store.append("si", s)
|
| 57 |
+
result = store.select("si")
|
| 58 |
+
tm.assert_series_equal(result, s)
|
| 59 |
+
|
| 60 |
+
_maybe_remove(store, "si2")
|
| 61 |
+
s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
|
| 62 |
+
store.append("si2", s)
|
| 63 |
+
result = store.select("si2")
|
| 64 |
+
tm.assert_series_equal(result, s)
|
| 65 |
+
|
| 66 |
+
# Multiple
|
| 67 |
+
_maybe_remove(store, "df2")
|
| 68 |
+
df2 = df.copy()
|
| 69 |
+
df2["s2"] = Series(list("abcdefg")).astype("category")
|
| 70 |
+
store.append("df2", df2)
|
| 71 |
+
result = store.select("df2")
|
| 72 |
+
tm.assert_frame_equal(result, df2)
|
| 73 |
+
|
| 74 |
+
# Make sure the metadata is OK
|
| 75 |
+
info = store.info()
|
| 76 |
+
assert "/df2 " in info
|
| 77 |
+
# df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical
|
| 78 |
+
assert "/df2/meta/values_block_0/meta" in info
|
| 79 |
+
assert "/df2/meta/values_block_2/meta" in info
|
| 80 |
+
|
| 81 |
+
# unordered
|
| 82 |
+
_maybe_remove(store, "s2")
|
| 83 |
+
s = Series(
|
| 84 |
+
Categorical(
|
| 85 |
+
["a", "b", "b", "a", "a", "c"],
|
| 86 |
+
categories=["a", "b", "c", "d"],
|
| 87 |
+
ordered=False,
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
+
store.append("s2", s, format="table")
|
| 91 |
+
result = store.select("s2")
|
| 92 |
+
tm.assert_series_equal(result, s)
|
| 93 |
+
|
| 94 |
+
# Query
|
| 95 |
+
_maybe_remove(store, "df3")
|
| 96 |
+
store.append("df3", df, data_columns=["s"])
|
| 97 |
+
expected = df[df.s.isin(["b", "c"])]
|
| 98 |
+
result = store.select("df3", where=['s in ["b","c"]'])
|
| 99 |
+
tm.assert_frame_equal(result, expected)
|
| 100 |
+
|
| 101 |
+
expected = df[df.s.isin(["b", "c"])]
|
| 102 |
+
result = store.select("df3", where=['s = ["b","c"]'])
|
| 103 |
+
tm.assert_frame_equal(result, expected)
|
| 104 |
+
|
| 105 |
+
expected = df[df.s.isin(["d"])]
|
| 106 |
+
result = store.select("df3", where=['s in ["d"]'])
|
| 107 |
+
tm.assert_frame_equal(result, expected)
|
| 108 |
+
|
| 109 |
+
expected = df[df.s.isin(["f"])]
|
| 110 |
+
result = store.select("df3", where=['s in ["f"]'])
|
| 111 |
+
tm.assert_frame_equal(result, expected)
|
| 112 |
+
|
| 113 |
+
# Appending with same categories is ok
|
| 114 |
+
store.append("df3", df)
|
| 115 |
+
|
| 116 |
+
df = concat([df, df])
|
| 117 |
+
expected = df[df.s.isin(["b", "c"])]
|
| 118 |
+
result = store.select("df3", where=['s in ["b","c"]'])
|
| 119 |
+
tm.assert_frame_equal(result, expected)
|
| 120 |
+
|
| 121 |
+
# Appending must have the same categories
|
| 122 |
+
df3 = df.copy()
|
| 123 |
+
df3["s"] = df3["s"].cat.remove_unused_categories()
|
| 124 |
+
|
| 125 |
+
msg = "cannot append a categorical with different categories to the existing"
|
| 126 |
+
with pytest.raises(ValueError, match=msg):
|
| 127 |
+
store.append("df3", df3)
|
| 128 |
+
|
| 129 |
+
# Remove, and make sure meta data is removed (its a recursive
|
| 130 |
+
# removal so should be).
|
| 131 |
+
result = store.select("df3/meta/s/meta")
|
| 132 |
+
assert result is not None
|
| 133 |
+
store.remove("df3")
|
| 134 |
+
|
| 135 |
+
with pytest.raises(
|
| 136 |
+
KeyError, match="'No object named df3/meta/s/meta in the file'"
|
| 137 |
+
):
|
| 138 |
+
store.select("df3/meta/s/meta")
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def test_categorical_conversion(tmp_path, setup_path):
|
| 142 |
+
# GH13322
|
| 143 |
+
# Check that read_hdf with categorical columns doesn't return rows if
|
| 144 |
+
# where criteria isn't met.
|
| 145 |
+
obsids = ["ESP_012345_6789", "ESP_987654_3210"]
|
| 146 |
+
imgids = ["APF00006np", "APF0001imm"]
|
| 147 |
+
data = [4.3, 9.8]
|
| 148 |
+
|
| 149 |
+
# Test without categories
|
| 150 |
+
df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})
|
| 151 |
+
|
| 152 |
+
# We are expecting an empty DataFrame matching types of df
|
| 153 |
+
expected = df.iloc[[], :]
|
| 154 |
+
path = tmp_path / setup_path
|
| 155 |
+
df.to_hdf(path, key="df", format="table", data_columns=True)
|
| 156 |
+
result = read_hdf(path, "df", where="obsids=B")
|
| 157 |
+
tm.assert_frame_equal(result, expected)
|
| 158 |
+
|
| 159 |
+
# Test with categories
|
| 160 |
+
df.obsids = df.obsids.astype("category")
|
| 161 |
+
df.imgids = df.imgids.astype("category")
|
| 162 |
+
|
| 163 |
+
# We are expecting an empty DataFrame matching types of df
|
| 164 |
+
expected = df.iloc[[], :]
|
| 165 |
+
path = tmp_path / setup_path
|
| 166 |
+
df.to_hdf(path, key="df", format="table", data_columns=True)
|
| 167 |
+
result = read_hdf(path, "df", where="obsids=B")
|
| 168 |
+
tm.assert_frame_equal(result, expected)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def test_categorical_nan_only_columns(tmp_path, setup_path):
|
| 172 |
+
# GH18413
|
| 173 |
+
# Check that read_hdf with categorical columns with NaN-only values can
|
| 174 |
+
# be read back.
|
| 175 |
+
df = DataFrame(
|
| 176 |
+
{
|
| 177 |
+
"a": ["a", "b", "c", np.nan],
|
| 178 |
+
"b": [np.nan, np.nan, np.nan, np.nan],
|
| 179 |
+
"c": [1, 2, 3, 4],
|
| 180 |
+
"d": Series([None] * 4, dtype=object),
|
| 181 |
+
}
|
| 182 |
+
)
|
| 183 |
+
df["a"] = df.a.astype("category")
|
| 184 |
+
df["b"] = df.b.astype("category")
|
| 185 |
+
df["d"] = df.b.astype("category")
|
| 186 |
+
expected = df
|
| 187 |
+
path = tmp_path / setup_path
|
| 188 |
+
df.to_hdf(path, key="df", format="table", data_columns=True)
|
| 189 |
+
result = read_hdf(path, "df")
|
| 190 |
+
tm.assert_frame_equal(result, expected)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
@pytest.mark.parametrize(
|
| 194 |
+
"where, df, expected",
|
| 195 |
+
[
|
| 196 |
+
('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})),
|
| 197 |
+
('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})),
|
| 198 |
+
],
|
| 199 |
+
)
|
| 200 |
+
def test_convert_value(
|
| 201 |
+
tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame
|
| 202 |
+
):
|
| 203 |
+
# GH39420
|
| 204 |
+
# Check that read_hdf with categorical columns can filter by where condition.
|
| 205 |
+
df.col = df.col.astype("category")
|
| 206 |
+
max_widths = {"col": 1}
|
| 207 |
+
categorical_values = sorted(df.col.unique())
|
| 208 |
+
expected.col = expected.col.astype("category")
|
| 209 |
+
expected.col = expected.col.cat.set_categories(categorical_values)
|
| 210 |
+
|
| 211 |
+
path = tmp_path / setup_path
|
| 212 |
+
df.to_hdf(path, key="df", format="table", min_itemsize=max_widths)
|
| 213 |
+
result = read_hdf(path, where=where)
|
| 214 |
+
tm.assert_frame_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_read.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from contextlib import closing
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pytest
|
| 7 |
+
|
| 8 |
+
from pandas._libs.tslibs import Timestamp
|
| 9 |
+
from pandas.compat import is_platform_windows
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
from pandas import (
|
| 13 |
+
DataFrame,
|
| 14 |
+
HDFStore,
|
| 15 |
+
Index,
|
| 16 |
+
Series,
|
| 17 |
+
_testing as tm,
|
| 18 |
+
date_range,
|
| 19 |
+
read_hdf,
|
| 20 |
+
)
|
| 21 |
+
from pandas.tests.io.pytables.common import (
|
| 22 |
+
_maybe_remove,
|
| 23 |
+
ensure_clean_store,
|
| 24 |
+
)
|
| 25 |
+
from pandas.util import _test_decorators as td
|
| 26 |
+
|
| 27 |
+
from pandas.io.pytables import TableIterator
|
| 28 |
+
|
| 29 |
+
pytestmark = [pytest.mark.single_cpu]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def test_read_missing_key_close_store(tmp_path, setup_path):
|
| 33 |
+
# GH 25766
|
| 34 |
+
path = tmp_path / setup_path
|
| 35 |
+
df = DataFrame({"a": range(2), "b": range(2)})
|
| 36 |
+
df.to_hdf(path, key="k1")
|
| 37 |
+
|
| 38 |
+
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
|
| 39 |
+
read_hdf(path, "k2")
|
| 40 |
+
|
| 41 |
+
# smoke test to test that file is properly closed after
|
| 42 |
+
# read with KeyError before another write
|
| 43 |
+
df.to_hdf(path, key="k2")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def test_read_index_error_close_store(tmp_path, setup_path):
|
| 47 |
+
# GH 25766
|
| 48 |
+
path = tmp_path / setup_path
|
| 49 |
+
df = DataFrame({"A": [], "B": []}, index=[])
|
| 50 |
+
df.to_hdf(path, key="k1")
|
| 51 |
+
|
| 52 |
+
with pytest.raises(IndexError, match=r"list index out of range"):
|
| 53 |
+
read_hdf(path, "k1", stop=0)
|
| 54 |
+
|
| 55 |
+
# smoke test to test that file is properly closed after
|
| 56 |
+
# read with IndexError before another write
|
| 57 |
+
df.to_hdf(path, key="k1")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def test_read_missing_key_opened_store(tmp_path, setup_path):
|
| 61 |
+
# GH 28699
|
| 62 |
+
path = tmp_path / setup_path
|
| 63 |
+
df = DataFrame({"a": range(2), "b": range(2)})
|
| 64 |
+
df.to_hdf(path, key="k1")
|
| 65 |
+
|
| 66 |
+
with HDFStore(path, "r") as store:
|
| 67 |
+
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
|
| 68 |
+
read_hdf(store, "k2")
|
| 69 |
+
|
| 70 |
+
# Test that the file is still open after a KeyError and that we can
|
| 71 |
+
# still read from it.
|
| 72 |
+
read_hdf(store, "k1")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def test_read_column(setup_path):
|
| 76 |
+
df = DataFrame(
|
| 77 |
+
np.random.default_rng(2).standard_normal((10, 4)),
|
| 78 |
+
columns=Index(list("ABCD")),
|
| 79 |
+
index=date_range("2000-01-01", periods=10, freq="B"),
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
with ensure_clean_store(setup_path) as store:
|
| 83 |
+
_maybe_remove(store, "df")
|
| 84 |
+
|
| 85 |
+
# GH 17912
|
| 86 |
+
# HDFStore.select_column should raise a KeyError
|
| 87 |
+
# exception if the key is not a valid store
|
| 88 |
+
with pytest.raises(KeyError, match="No object named df in the file"):
|
| 89 |
+
store.select_column("df", "index")
|
| 90 |
+
|
| 91 |
+
store.append("df", df)
|
| 92 |
+
# error
|
| 93 |
+
with pytest.raises(
|
| 94 |
+
KeyError, match=re.escape("'column [foo] not found in the table'")
|
| 95 |
+
):
|
| 96 |
+
store.select_column("df", "foo")
|
| 97 |
+
|
| 98 |
+
msg = re.escape("select_column() got an unexpected keyword argument 'where'")
|
| 99 |
+
with pytest.raises(TypeError, match=msg):
|
| 100 |
+
store.select_column("df", "index", where=["index>5"])
|
| 101 |
+
|
| 102 |
+
# valid
|
| 103 |
+
result = store.select_column("df", "index")
|
| 104 |
+
tm.assert_almost_equal(result.values, Series(df.index).values)
|
| 105 |
+
assert isinstance(result, Series)
|
| 106 |
+
|
| 107 |
+
# not a data indexable column
|
| 108 |
+
msg = re.escape(
|
| 109 |
+
"column [values_block_0] can not be extracted individually; "
|
| 110 |
+
"it is not data indexable"
|
| 111 |
+
)
|
| 112 |
+
with pytest.raises(ValueError, match=msg):
|
| 113 |
+
store.select_column("df", "values_block_0")
|
| 114 |
+
|
| 115 |
+
# a data column
|
| 116 |
+
df2 = df.copy()
|
| 117 |
+
df2["string"] = "foo"
|
| 118 |
+
store.append("df2", df2, data_columns=["string"])
|
| 119 |
+
result = store.select_column("df2", "string")
|
| 120 |
+
tm.assert_almost_equal(result.values, df2["string"].values)
|
| 121 |
+
|
| 122 |
+
# a data column with NaNs, result excludes the NaNs
|
| 123 |
+
df3 = df.copy()
|
| 124 |
+
df3["string"] = "foo"
|
| 125 |
+
df3.loc[df3.index[4:6], "string"] = np.nan
|
| 126 |
+
store.append("df3", df3, data_columns=["string"])
|
| 127 |
+
result = store.select_column("df3", "string")
|
| 128 |
+
tm.assert_almost_equal(result.values, df3["string"].values)
|
| 129 |
+
|
| 130 |
+
# start/stop
|
| 131 |
+
result = store.select_column("df3", "string", start=2)
|
| 132 |
+
tm.assert_almost_equal(result.values, df3["string"].values[2:])
|
| 133 |
+
|
| 134 |
+
result = store.select_column("df3", "string", start=-2)
|
| 135 |
+
tm.assert_almost_equal(result.values, df3["string"].values[-2:])
|
| 136 |
+
|
| 137 |
+
result = store.select_column("df3", "string", stop=2)
|
| 138 |
+
tm.assert_almost_equal(result.values, df3["string"].values[:2])
|
| 139 |
+
|
| 140 |
+
result = store.select_column("df3", "string", stop=-2)
|
| 141 |
+
tm.assert_almost_equal(result.values, df3["string"].values[:-2])
|
| 142 |
+
|
| 143 |
+
result = store.select_column("df3", "string", start=2, stop=-2)
|
| 144 |
+
tm.assert_almost_equal(result.values, df3["string"].values[2:-2])
|
| 145 |
+
|
| 146 |
+
result = store.select_column("df3", "string", start=-2, stop=2)
|
| 147 |
+
tm.assert_almost_equal(result.values, df3["string"].values[-2:2])
|
| 148 |
+
|
| 149 |
+
# GH 10392 - make sure column name is preserved
|
| 150 |
+
df4 = DataFrame({"A": np.random.default_rng(2).standard_normal(10), "B": "foo"})
|
| 151 |
+
store.append("df4", df4, data_columns=True)
|
| 152 |
+
expected = df4["B"]
|
| 153 |
+
result = store.select_column("df4", "B")
|
| 154 |
+
tm.assert_series_equal(result, expected)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def test_pytables_native_read(datapath):
|
| 158 |
+
with ensure_clean_store(
|
| 159 |
+
datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
|
| 160 |
+
) as store:
|
| 161 |
+
d2 = store["detector/readout"]
|
| 162 |
+
assert isinstance(d2, DataFrame)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
@pytest.mark.skipif(is_platform_windows(), reason="native2 read fails oddly on windows")
|
| 166 |
+
def test_pytables_native2_read(datapath):
|
| 167 |
+
with ensure_clean_store(
|
| 168 |
+
datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"
|
| 169 |
+
) as store:
|
| 170 |
+
str(store)
|
| 171 |
+
d1 = store["detector"]
|
| 172 |
+
assert isinstance(d1, DataFrame)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def test_legacy_table_fixed_format_read_py2(datapath):
|
| 176 |
+
# GH 24510
|
| 177 |
+
# legacy table with fixed format written in Python 2
|
| 178 |
+
with ensure_clean_store(
|
| 179 |
+
datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"
|
| 180 |
+
) as store:
|
| 181 |
+
result = store.select("df")
|
| 182 |
+
expected = DataFrame(
|
| 183 |
+
[[1, 2, 3, "D"]],
|
| 184 |
+
columns=["A", "B", "C", "D"],
|
| 185 |
+
index=Index(["ABC"], name="INDEX_NAME"),
|
| 186 |
+
)
|
| 187 |
+
tm.assert_frame_equal(expected, result)
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def test_legacy_table_fixed_format_read_datetime_py2(datapath):
|
| 191 |
+
# GH 31750
|
| 192 |
+
# legacy table with fixed format and datetime64 column written in Python 2
|
| 193 |
+
expected = DataFrame(
|
| 194 |
+
[[Timestamp("2020-02-06T18:00")]],
|
| 195 |
+
columns=["A"],
|
| 196 |
+
index=Index(["date"]),
|
| 197 |
+
dtype="M8[ns]",
|
| 198 |
+
)
|
| 199 |
+
with ensure_clean_store(
|
| 200 |
+
datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),
|
| 201 |
+
mode="r",
|
| 202 |
+
) as store:
|
| 203 |
+
result = store.select("df")
|
| 204 |
+
tm.assert_frame_equal(expected, result)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def test_legacy_table_read_py2(datapath):
|
| 208 |
+
# issue: 24925
|
| 209 |
+
# legacy table written in Python 2
|
| 210 |
+
with ensure_clean_store(
|
| 211 |
+
datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"
|
| 212 |
+
) as store:
|
| 213 |
+
result = store.select("table")
|
| 214 |
+
|
| 215 |
+
expected = DataFrame({"a": ["a", "b"], "b": [2, 3]})
|
| 216 |
+
tm.assert_frame_equal(expected, result)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string):
|
| 220 |
+
# GH10330
|
| 221 |
+
# No check for non-string path_or-buf, and no test of open store
|
| 222 |
+
df = DataFrame(
|
| 223 |
+
np.random.default_rng(2).random((4, 5)),
|
| 224 |
+
index=list("abcd"),
|
| 225 |
+
columns=list("ABCDE"),
|
| 226 |
+
)
|
| 227 |
+
df.index.name = "letters"
|
| 228 |
+
df = df.set_index(keys="E", append=True)
|
| 229 |
+
|
| 230 |
+
path = tmp_path / setup_path
|
| 231 |
+
if using_infer_string:
|
| 232 |
+
# TODO(infer_string) make this work for string dtype
|
| 233 |
+
msg = "Saving a MultiIndex with an extension dtype is not supported."
|
| 234 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 235 |
+
df.to_hdf(path, key="df", mode="w")
|
| 236 |
+
return
|
| 237 |
+
df.to_hdf(path, key="df", mode="w")
|
| 238 |
+
direct = read_hdf(path, "df")
|
| 239 |
+
with HDFStore(path, mode="r") as store:
|
| 240 |
+
indirect = read_hdf(store, "df")
|
| 241 |
+
tm.assert_frame_equal(direct, indirect)
|
| 242 |
+
assert store.is_open
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def test_read_hdf_index_not_view(tmp_path, setup_path):
|
| 246 |
+
# GH 37441
|
| 247 |
+
# Ensure that the index of the DataFrame is not a view
|
| 248 |
+
# into the original recarray that pytables reads in
|
| 249 |
+
df = DataFrame(
|
| 250 |
+
np.random.default_rng(2).random((4, 5)),
|
| 251 |
+
index=[0, 1, 2, 3],
|
| 252 |
+
columns=list("ABCDE"),
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
path = tmp_path / setup_path
|
| 256 |
+
df.to_hdf(path, key="df", mode="w", format="table")
|
| 257 |
+
|
| 258 |
+
df2 = read_hdf(path, "df")
|
| 259 |
+
assert df2.index._data.base is None
|
| 260 |
+
tm.assert_frame_equal(df, df2)
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def test_read_hdf_iterator(tmp_path, setup_path):
|
| 264 |
+
df = DataFrame(
|
| 265 |
+
np.random.default_rng(2).random((4, 5)),
|
| 266 |
+
index=list("abcd"),
|
| 267 |
+
columns=list("ABCDE"),
|
| 268 |
+
)
|
| 269 |
+
df.index.name = "letters"
|
| 270 |
+
df = df.set_index(keys="E", append=True)
|
| 271 |
+
|
| 272 |
+
path = tmp_path / setup_path
|
| 273 |
+
df.to_hdf(path, key="df", mode="w", format="t")
|
| 274 |
+
direct = read_hdf(path, "df")
|
| 275 |
+
iterator = read_hdf(path, "df", iterator=True)
|
| 276 |
+
with closing(iterator.store):
|
| 277 |
+
assert isinstance(iterator, TableIterator)
|
| 278 |
+
indirect = next(iterator.__iter__())
|
| 279 |
+
tm.assert_frame_equal(direct, indirect)
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def test_read_nokey(tmp_path, setup_path):
|
| 283 |
+
# GH10443
|
| 284 |
+
df = DataFrame(
|
| 285 |
+
np.random.default_rng(2).random((4, 5)),
|
| 286 |
+
index=list("abcd"),
|
| 287 |
+
columns=list("ABCDE"),
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
# Categorical dtype not supported for "fixed" format. So no need
|
| 291 |
+
# to test with that dtype in the dataframe here.
|
| 292 |
+
path = tmp_path / setup_path
|
| 293 |
+
df.to_hdf(path, key="df", mode="a")
|
| 294 |
+
reread = read_hdf(path)
|
| 295 |
+
tm.assert_frame_equal(df, reread)
|
| 296 |
+
df.to_hdf(path, key="df2", mode="a")
|
| 297 |
+
|
| 298 |
+
msg = "key must be provided when HDF5 file contains multiple datasets."
|
| 299 |
+
with pytest.raises(ValueError, match=msg):
|
| 300 |
+
read_hdf(path)
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def test_read_nokey_table(tmp_path, setup_path):
|
| 304 |
+
# GH13231
|
| 305 |
+
df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})
|
| 306 |
+
|
| 307 |
+
path = tmp_path / setup_path
|
| 308 |
+
df.to_hdf(path, key="df", mode="a", format="table")
|
| 309 |
+
reread = read_hdf(path)
|
| 310 |
+
tm.assert_frame_equal(df, reread)
|
| 311 |
+
df.to_hdf(path, key="df2", mode="a", format="table")
|
| 312 |
+
|
| 313 |
+
msg = "key must be provided when HDF5 file contains multiple datasets."
|
| 314 |
+
with pytest.raises(ValueError, match=msg):
|
| 315 |
+
read_hdf(path)
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def test_read_nokey_empty(tmp_path, setup_path):
|
| 319 |
+
path = tmp_path / setup_path
|
| 320 |
+
store = HDFStore(path)
|
| 321 |
+
store.close()
|
| 322 |
+
msg = re.escape(
|
| 323 |
+
"Dataset(s) incompatible with Pandas data types, not table, or no "
|
| 324 |
+
"datasets found in HDF5 file."
|
| 325 |
+
)
|
| 326 |
+
with pytest.raises(ValueError, match=msg):
|
| 327 |
+
read_hdf(path)
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def test_read_from_pathlib_path(tmp_path, setup_path):
|
| 331 |
+
# GH11773
|
| 332 |
+
expected = DataFrame(
|
| 333 |
+
np.random.default_rng(2).random((4, 5)),
|
| 334 |
+
index=list("abcd"),
|
| 335 |
+
columns=list("ABCDE"),
|
| 336 |
+
)
|
| 337 |
+
filename = tmp_path / setup_path
|
| 338 |
+
path_obj = Path(filename)
|
| 339 |
+
|
| 340 |
+
expected.to_hdf(path_obj, key="df", mode="a")
|
| 341 |
+
actual = read_hdf(path_obj, key="df")
|
| 342 |
+
|
| 343 |
+
tm.assert_frame_equal(expected, actual)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
@td.skip_if_no("py.path")
|
| 347 |
+
def test_read_from_py_localpath(tmp_path, setup_path):
|
| 348 |
+
# GH11773
|
| 349 |
+
from py.path import local as LocalPath
|
| 350 |
+
|
| 351 |
+
expected = DataFrame(
|
| 352 |
+
np.random.default_rng(2).random((4, 5)),
|
| 353 |
+
index=list("abcd"),
|
| 354 |
+
columns=list("ABCDE"),
|
| 355 |
+
)
|
| 356 |
+
filename = tmp_path / setup_path
|
| 357 |
+
path_obj = LocalPath(filename)
|
| 358 |
+
|
| 359 |
+
expected.to_hdf(path_obj, key="df", mode="a")
|
| 360 |
+
actual = read_hdf(path_obj, key="df")
|
| 361 |
+
|
| 362 |
+
tm.assert_frame_equal(expected, actual)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
@pytest.mark.parametrize("format", ["fixed", "table"])
|
| 366 |
+
def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
|
| 367 |
+
# GH 16583
|
| 368 |
+
# Tests that reading a Series saved to an HDF file
|
| 369 |
+
# still works if a mode='r' argument is supplied
|
| 370 |
+
series = Series(range(10), dtype=np.float64)
|
| 371 |
+
path = tmp_path / setup_path
|
| 372 |
+
series.to_hdf(path, key="data", format=format)
|
| 373 |
+
result = read_hdf(path, key="data", mode="r")
|
| 374 |
+
tm.assert_series_equal(result, series)
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
|
| 378 |
+
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
| 379 |
+
def test_read_py2_hdf_file_in_py3(datapath):
|
| 380 |
+
# GH 16781
|
| 381 |
+
|
| 382 |
+
# tests reading a PeriodIndex DataFrame written in Python2 in Python3
|
| 383 |
+
|
| 384 |
+
# the file was generated in Python 2.7 like so:
|
| 385 |
+
#
|
| 386 |
+
# df = DataFrame([1.,2,3], index=pd.PeriodIndex(
|
| 387 |
+
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
|
| 388 |
+
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
|
| 389 |
+
|
| 390 |
+
expected = DataFrame(
|
| 391 |
+
[1.0, 2, 3],
|
| 392 |
+
index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
with ensure_clean_store(
|
| 396 |
+
datapath(
|
| 397 |
+
"io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"
|
| 398 |
+
),
|
| 399 |
+
mode="r",
|
| 400 |
+
) as store:
|
| 401 |
+
result = store["p"]
|
| 402 |
+
tm.assert_frame_equal(result, expected)
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
def test_read_infer_string(tmp_path, setup_path):
|
| 406 |
+
# GH#54431
|
| 407 |
+
df = DataFrame({"a": ["a", "b", None]})
|
| 408 |
+
path = tmp_path / setup_path
|
| 409 |
+
df.to_hdf(path, key="data", format="table")
|
| 410 |
+
with pd.option_context("future.infer_string", True):
|
| 411 |
+
result = read_hdf(path, key="data", mode="r")
|
| 412 |
+
expected = DataFrame(
|
| 413 |
+
{"a": ["a", "b", None]},
|
| 414 |
+
dtype=pd.StringDtype(na_value=np.nan),
|
| 415 |
+
columns=Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
|
| 416 |
+
)
|
| 417 |
+
tm.assert_frame_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_constructors.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
Interval,
|
| 5 |
+
Period,
|
| 6 |
+
Timestamp,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestIntervalConstructors:
|
| 11 |
+
@pytest.mark.parametrize(
|
| 12 |
+
"left, right",
|
| 13 |
+
[
|
| 14 |
+
("a", "z"),
|
| 15 |
+
(("a", "b"), ("c", "d")),
|
| 16 |
+
(list("AB"), list("ab")),
|
| 17 |
+
(Interval(0, 1), Interval(1, 2)),
|
| 18 |
+
(Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")),
|
| 19 |
+
],
|
| 20 |
+
)
|
| 21 |
+
def test_construct_errors(self, left, right):
|
| 22 |
+
# GH#23013
|
| 23 |
+
msg = "Only numeric, Timestamp and Timedelta endpoints are allowed"
|
| 24 |
+
with pytest.raises(ValueError, match=msg):
|
| 25 |
+
Interval(left, right)
|
| 26 |
+
|
| 27 |
+
def test_constructor_errors(self):
|
| 28 |
+
msg = "invalid option for 'closed': foo"
|
| 29 |
+
with pytest.raises(ValueError, match=msg):
|
| 30 |
+
Interval(0, 1, closed="foo")
|
| 31 |
+
|
| 32 |
+
msg = "left side of interval must be <= right side"
|
| 33 |
+
with pytest.raises(ValueError, match=msg):
|
| 34 |
+
Interval(1, 0)
|
| 35 |
+
|
| 36 |
+
@pytest.mark.parametrize(
|
| 37 |
+
"tz_left, tz_right", [(None, "UTC"), ("UTC", None), ("UTC", "US/Eastern")]
|
| 38 |
+
)
|
| 39 |
+
def test_constructor_errors_tz(self, tz_left, tz_right):
|
| 40 |
+
# GH#18538
|
| 41 |
+
left = Timestamp("2017-01-01", tz=tz_left)
|
| 42 |
+
right = Timestamp("2017-01-02", tz=tz_right)
|
| 43 |
+
|
| 44 |
+
if tz_left is None or tz_right is None:
|
| 45 |
+
error = TypeError
|
| 46 |
+
msg = "Cannot compare tz-naive and tz-aware timestamps"
|
| 47 |
+
else:
|
| 48 |
+
error = ValueError
|
| 49 |
+
msg = "left and right must have the same time zone"
|
| 50 |
+
with pytest.raises(error, match=msg):
|
| 51 |
+
Interval(left, right)
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_contains.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
Interval,
|
| 5 |
+
Timedelta,
|
| 6 |
+
Timestamp,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestContains:
|
| 11 |
+
def test_contains(self):
|
| 12 |
+
interval = Interval(0, 1)
|
| 13 |
+
assert 0.5 in interval
|
| 14 |
+
assert 1 in interval
|
| 15 |
+
assert 0 not in interval
|
| 16 |
+
|
| 17 |
+
interval_both = Interval(0, 1, "both")
|
| 18 |
+
assert 0 in interval_both
|
| 19 |
+
assert 1 in interval_both
|
| 20 |
+
|
| 21 |
+
interval_neither = Interval(0, 1, closed="neither")
|
| 22 |
+
assert 0 not in interval_neither
|
| 23 |
+
assert 0.5 in interval_neither
|
| 24 |
+
assert 1 not in interval_neither
|
| 25 |
+
|
| 26 |
+
def test_contains_interval(self, inclusive_endpoints_fixture):
|
| 27 |
+
interval1 = Interval(0, 1, "both")
|
| 28 |
+
interval2 = Interval(0, 1, inclusive_endpoints_fixture)
|
| 29 |
+
assert interval1 in interval1
|
| 30 |
+
assert interval2 in interval2
|
| 31 |
+
assert interval2 in interval1
|
| 32 |
+
assert interval1 not in interval2 or inclusive_endpoints_fixture == "both"
|
| 33 |
+
|
| 34 |
+
def test_contains_infinite_length(self):
|
| 35 |
+
interval1 = Interval(0, 1, "both")
|
| 36 |
+
interval2 = Interval(float("-inf"), float("inf"), "neither")
|
| 37 |
+
assert interval1 in interval2
|
| 38 |
+
assert interval2 not in interval1
|
| 39 |
+
|
| 40 |
+
def test_contains_zero_length(self):
|
| 41 |
+
interval1 = Interval(0, 1, "both")
|
| 42 |
+
interval2 = Interval(-1, -1, "both")
|
| 43 |
+
interval3 = Interval(0.5, 0.5, "both")
|
| 44 |
+
assert interval2 not in interval1
|
| 45 |
+
assert interval3 in interval1
|
| 46 |
+
assert interval2 not in interval3 and interval3 not in interval2
|
| 47 |
+
assert interval1 not in interval2 and interval1 not in interval3
|
| 48 |
+
|
| 49 |
+
@pytest.mark.parametrize(
|
| 50 |
+
"type1",
|
| 51 |
+
[
|
| 52 |
+
(0, 1),
|
| 53 |
+
(Timestamp(2000, 1, 1, 0), Timestamp(2000, 1, 1, 1)),
|
| 54 |
+
(Timedelta("0h"), Timedelta("1h")),
|
| 55 |
+
],
|
| 56 |
+
)
|
| 57 |
+
@pytest.mark.parametrize(
|
| 58 |
+
"type2",
|
| 59 |
+
[
|
| 60 |
+
(0, 1),
|
| 61 |
+
(Timestamp(2000, 1, 1, 0), Timestamp(2000, 1, 1, 1)),
|
| 62 |
+
(Timedelta("0h"), Timedelta("1h")),
|
| 63 |
+
],
|
| 64 |
+
)
|
| 65 |
+
def test_contains_mixed_types(self, type1, type2):
|
| 66 |
+
interval1 = Interval(*type1)
|
| 67 |
+
interval2 = Interval(*type2)
|
| 68 |
+
if type1 == type2:
|
| 69 |
+
assert interval1 in interval2
|
| 70 |
+
else:
|
| 71 |
+
msg = "^'<=' not supported between instances of"
|
| 72 |
+
with pytest.raises(TypeError, match=msg):
|
| 73 |
+
interval1 in interval2
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_interval.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Interval,
|
| 6 |
+
Timedelta,
|
| 7 |
+
Timestamp,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.fixture
|
| 12 |
+
def interval():
|
| 13 |
+
return Interval(0, 1)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestInterval:
|
| 17 |
+
def test_properties(self, interval):
|
| 18 |
+
assert interval.closed == "right"
|
| 19 |
+
assert interval.left == 0
|
| 20 |
+
assert interval.right == 1
|
| 21 |
+
assert interval.mid == 0.5
|
| 22 |
+
|
| 23 |
+
def test_hash(self, interval):
|
| 24 |
+
# should not raise
|
| 25 |
+
hash(interval)
|
| 26 |
+
|
| 27 |
+
@pytest.mark.parametrize(
|
| 28 |
+
"left, right, expected",
|
| 29 |
+
[
|
| 30 |
+
(0, 5, 5),
|
| 31 |
+
(-2, 5.5, 7.5),
|
| 32 |
+
(10, 10, 0),
|
| 33 |
+
(10, np.inf, np.inf),
|
| 34 |
+
(-np.inf, -5, np.inf),
|
| 35 |
+
(-np.inf, np.inf, np.inf),
|
| 36 |
+
(Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")),
|
| 37 |
+
(Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")),
|
| 38 |
+
(Timedelta("1h10min"), Timedelta("5h5min"), Timedelta("3h55min")),
|
| 39 |
+
(Timedelta("5s"), Timedelta("1h"), Timedelta("59min55s")),
|
| 40 |
+
],
|
| 41 |
+
)
|
| 42 |
+
def test_length(self, left, right, expected):
|
| 43 |
+
# GH 18789
|
| 44 |
+
iv = Interval(left, right)
|
| 45 |
+
result = iv.length
|
| 46 |
+
assert result == expected
|
| 47 |
+
|
| 48 |
+
@pytest.mark.parametrize(
|
| 49 |
+
"left, right, expected",
|
| 50 |
+
[
|
| 51 |
+
("2017-01-01", "2017-01-06", "5 days"),
|
| 52 |
+
("2017-01-01", "2017-01-01 12:00:00", "12 hours"),
|
| 53 |
+
("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"),
|
| 54 |
+
("2017-01-01 12:01", "2017-01-05 17:31:00", "4 days 5 hours 30 min"),
|
| 55 |
+
],
|
| 56 |
+
)
|
| 57 |
+
@pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern"))
|
| 58 |
+
def test_length_timestamp(self, tz, left, right, expected):
|
| 59 |
+
# GH 18789
|
| 60 |
+
iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz))
|
| 61 |
+
result = iv.length
|
| 62 |
+
expected = Timedelta(expected)
|
| 63 |
+
assert result == expected
|
| 64 |
+
|
| 65 |
+
@pytest.mark.parametrize(
|
| 66 |
+
"left, right",
|
| 67 |
+
[
|
| 68 |
+
(0, 1),
|
| 69 |
+
(Timedelta("0 days"), Timedelta("1 day")),
|
| 70 |
+
(Timestamp("2018-01-01"), Timestamp("2018-01-02")),
|
| 71 |
+
(
|
| 72 |
+
Timestamp("2018-01-01", tz="US/Eastern"),
|
| 73 |
+
Timestamp("2018-01-02", tz="US/Eastern"),
|
| 74 |
+
),
|
| 75 |
+
],
|
| 76 |
+
)
|
| 77 |
+
def test_is_empty(self, left, right, closed):
|
| 78 |
+
# GH27219
|
| 79 |
+
# non-empty always return False
|
| 80 |
+
iv = Interval(left, right, closed)
|
| 81 |
+
assert iv.is_empty is False
|
| 82 |
+
|
| 83 |
+
# same endpoint is empty except when closed='both' (contains one point)
|
| 84 |
+
iv = Interval(left, left, closed)
|
| 85 |
+
result = iv.is_empty
|
| 86 |
+
expected = closed != "both"
|
| 87 |
+
assert result is expected
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_overlaps.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
Interval,
|
| 5 |
+
Timedelta,
|
| 6 |
+
Timestamp,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@pytest.fixture(
|
| 11 |
+
params=[
|
| 12 |
+
(Timedelta("0 days"), Timedelta("1 day")),
|
| 13 |
+
(Timestamp("2018-01-01"), Timedelta("1 day")),
|
| 14 |
+
(0, 1),
|
| 15 |
+
],
|
| 16 |
+
ids=lambda x: type(x[0]).__name__,
|
| 17 |
+
)
|
| 18 |
+
def start_shift(request):
|
| 19 |
+
"""
|
| 20 |
+
Fixture for generating intervals of types from a start value and a shift
|
| 21 |
+
value that can be added to start to generate an endpoint
|
| 22 |
+
"""
|
| 23 |
+
return request.param
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TestOverlaps:
|
| 27 |
+
def test_overlaps_self(self, start_shift, closed):
|
| 28 |
+
start, shift = start_shift
|
| 29 |
+
interval = Interval(start, start + shift, closed)
|
| 30 |
+
assert interval.overlaps(interval)
|
| 31 |
+
|
| 32 |
+
def test_overlaps_nested(self, start_shift, closed, other_closed):
|
| 33 |
+
start, shift = start_shift
|
| 34 |
+
interval1 = Interval(start, start + 3 * shift, other_closed)
|
| 35 |
+
interval2 = Interval(start + shift, start + 2 * shift, closed)
|
| 36 |
+
|
| 37 |
+
# nested intervals should always overlap
|
| 38 |
+
assert interval1.overlaps(interval2)
|
| 39 |
+
|
| 40 |
+
def test_overlaps_disjoint(self, start_shift, closed, other_closed):
|
| 41 |
+
start, shift = start_shift
|
| 42 |
+
interval1 = Interval(start, start + shift, other_closed)
|
| 43 |
+
interval2 = Interval(start + 2 * shift, start + 3 * shift, closed)
|
| 44 |
+
|
| 45 |
+
# disjoint intervals should never overlap
|
| 46 |
+
assert not interval1.overlaps(interval2)
|
| 47 |
+
|
| 48 |
+
def test_overlaps_endpoint(self, start_shift, closed, other_closed):
|
| 49 |
+
start, shift = start_shift
|
| 50 |
+
interval1 = Interval(start, start + shift, other_closed)
|
| 51 |
+
interval2 = Interval(start + shift, start + 2 * shift, closed)
|
| 52 |
+
|
| 53 |
+
# overlap if shared endpoint is closed for both (overlap at a point)
|
| 54 |
+
result = interval1.overlaps(interval2)
|
| 55 |
+
expected = interval1.closed_right and interval2.closed_left
|
| 56 |
+
assert result == expected
|
| 57 |
+
|
| 58 |
+
@pytest.mark.parametrize(
|
| 59 |
+
"other",
|
| 60 |
+
[10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")],
|
| 61 |
+
ids=lambda x: type(x).__name__,
|
| 62 |
+
)
|
| 63 |
+
def test_overlaps_invalid_type(self, other):
|
| 64 |
+
interval = Interval(0, 1)
|
| 65 |
+
msg = f"`other` must be an Interval, got {type(other).__name__}"
|
| 66 |
+
with pytest.raises(TypeError, match=msg):
|
| 67 |
+
interval.overlaps(other)
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_formats.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
import pprint
|
| 3 |
+
|
| 4 |
+
import dateutil.tz
|
| 5 |
+
import pytest
|
| 6 |
+
import pytz # a test below uses pytz but only inside a `eval` call
|
| 7 |
+
|
| 8 |
+
from pandas import Timestamp
|
| 9 |
+
|
| 10 |
+
ts_no_ns = Timestamp(
|
| 11 |
+
year=2019,
|
| 12 |
+
month=5,
|
| 13 |
+
day=18,
|
| 14 |
+
hour=15,
|
| 15 |
+
minute=17,
|
| 16 |
+
second=8,
|
| 17 |
+
microsecond=132263,
|
| 18 |
+
)
|
| 19 |
+
ts_no_ns_year1 = Timestamp(
|
| 20 |
+
year=1,
|
| 21 |
+
month=5,
|
| 22 |
+
day=18,
|
| 23 |
+
hour=15,
|
| 24 |
+
minute=17,
|
| 25 |
+
second=8,
|
| 26 |
+
microsecond=132263,
|
| 27 |
+
)
|
| 28 |
+
ts_ns = Timestamp(
|
| 29 |
+
year=2019,
|
| 30 |
+
month=5,
|
| 31 |
+
day=18,
|
| 32 |
+
hour=15,
|
| 33 |
+
minute=17,
|
| 34 |
+
second=8,
|
| 35 |
+
microsecond=132263,
|
| 36 |
+
nanosecond=123,
|
| 37 |
+
)
|
| 38 |
+
ts_ns_tz = Timestamp(
|
| 39 |
+
year=2019,
|
| 40 |
+
month=5,
|
| 41 |
+
day=18,
|
| 42 |
+
hour=15,
|
| 43 |
+
minute=17,
|
| 44 |
+
second=8,
|
| 45 |
+
microsecond=132263,
|
| 46 |
+
nanosecond=123,
|
| 47 |
+
tz="UTC",
|
| 48 |
+
)
|
| 49 |
+
ts_no_us = Timestamp(
|
| 50 |
+
year=2019,
|
| 51 |
+
month=5,
|
| 52 |
+
day=18,
|
| 53 |
+
hour=15,
|
| 54 |
+
minute=17,
|
| 55 |
+
second=8,
|
| 56 |
+
microsecond=0,
|
| 57 |
+
nanosecond=123,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@pytest.mark.parametrize(
|
| 62 |
+
"ts, timespec, expected_iso",
|
| 63 |
+
[
|
| 64 |
+
(ts_no_ns, "auto", "2019-05-18T15:17:08.132263"),
|
| 65 |
+
(ts_no_ns, "seconds", "2019-05-18T15:17:08"),
|
| 66 |
+
(ts_no_ns, "nanoseconds", "2019-05-18T15:17:08.132263000"),
|
| 67 |
+
(ts_no_ns_year1, "seconds", "0001-05-18T15:17:08"),
|
| 68 |
+
(ts_no_ns_year1, "nanoseconds", "0001-05-18T15:17:08.132263000"),
|
| 69 |
+
(ts_ns, "auto", "2019-05-18T15:17:08.132263123"),
|
| 70 |
+
(ts_ns, "hours", "2019-05-18T15"),
|
| 71 |
+
(ts_ns, "minutes", "2019-05-18T15:17"),
|
| 72 |
+
(ts_ns, "seconds", "2019-05-18T15:17:08"),
|
| 73 |
+
(ts_ns, "milliseconds", "2019-05-18T15:17:08.132"),
|
| 74 |
+
(ts_ns, "microseconds", "2019-05-18T15:17:08.132263"),
|
| 75 |
+
(ts_ns, "nanoseconds", "2019-05-18T15:17:08.132263123"),
|
| 76 |
+
(ts_ns_tz, "auto", "2019-05-18T15:17:08.132263123+00:00"),
|
| 77 |
+
(ts_ns_tz, "hours", "2019-05-18T15+00:00"),
|
| 78 |
+
(ts_ns_tz, "minutes", "2019-05-18T15:17+00:00"),
|
| 79 |
+
(ts_ns_tz, "seconds", "2019-05-18T15:17:08+00:00"),
|
| 80 |
+
(ts_ns_tz, "milliseconds", "2019-05-18T15:17:08.132+00:00"),
|
| 81 |
+
(ts_ns_tz, "microseconds", "2019-05-18T15:17:08.132263+00:00"),
|
| 82 |
+
(ts_ns_tz, "nanoseconds", "2019-05-18T15:17:08.132263123+00:00"),
|
| 83 |
+
(ts_no_us, "auto", "2019-05-18T15:17:08.000000123"),
|
| 84 |
+
],
|
| 85 |
+
)
|
| 86 |
+
def test_isoformat(ts, timespec, expected_iso):
|
| 87 |
+
assert ts.isoformat(timespec=timespec) == expected_iso
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class TestTimestampRendering:
|
| 91 |
+
timezones = ["UTC", "Asia/Tokyo", "US/Eastern", "dateutil/America/Los_Angeles"]
|
| 92 |
+
|
| 93 |
+
@pytest.mark.parametrize("tz", timezones)
|
| 94 |
+
@pytest.mark.parametrize("freq", ["D", "M", "S", "N"])
|
| 95 |
+
@pytest.mark.parametrize(
|
| 96 |
+
"date", ["2014-03-07", "2014-01-01 09:00", "2014-01-01 00:00:00.000000001"]
|
| 97 |
+
)
|
| 98 |
+
def test_repr(self, date, freq, tz):
|
| 99 |
+
# avoid to match with timezone name
|
| 100 |
+
freq_repr = f"'{freq}'"
|
| 101 |
+
if tz.startswith("dateutil"):
|
| 102 |
+
tz_repr = tz.replace("dateutil", "")
|
| 103 |
+
else:
|
| 104 |
+
tz_repr = tz
|
| 105 |
+
|
| 106 |
+
date_only = Timestamp(date)
|
| 107 |
+
assert date in repr(date_only)
|
| 108 |
+
assert tz_repr not in repr(date_only)
|
| 109 |
+
assert freq_repr not in repr(date_only)
|
| 110 |
+
assert date_only == eval(repr(date_only))
|
| 111 |
+
|
| 112 |
+
date_tz = Timestamp(date, tz=tz)
|
| 113 |
+
assert date in repr(date_tz)
|
| 114 |
+
assert tz_repr in repr(date_tz)
|
| 115 |
+
assert freq_repr not in repr(date_tz)
|
| 116 |
+
assert date_tz == eval(repr(date_tz))
|
| 117 |
+
|
| 118 |
+
def test_repr_utcoffset(self):
|
| 119 |
+
# This can cause the tz field to be populated, but it's redundant to
|
| 120 |
+
# include this information in the date-string.
|
| 121 |
+
date_with_utc_offset = Timestamp("2014-03-13 00:00:00-0400", tz=None)
|
| 122 |
+
assert "2014-03-13 00:00:00-0400" in repr(date_with_utc_offset)
|
| 123 |
+
assert "tzoffset" not in repr(date_with_utc_offset)
|
| 124 |
+
assert "UTC-04:00" in repr(date_with_utc_offset)
|
| 125 |
+
expr = repr(date_with_utc_offset)
|
| 126 |
+
assert date_with_utc_offset == eval(expr)
|
| 127 |
+
|
| 128 |
+
def test_timestamp_repr_pre1900(self):
|
| 129 |
+
# pre-1900
|
| 130 |
+
stamp = Timestamp("1850-01-01", tz="US/Eastern")
|
| 131 |
+
repr(stamp)
|
| 132 |
+
|
| 133 |
+
iso8601 = "1850-01-01 01:23:45.012345"
|
| 134 |
+
stamp = Timestamp(iso8601, tz="US/Eastern")
|
| 135 |
+
result = repr(stamp)
|
| 136 |
+
assert iso8601 in result
|
| 137 |
+
|
| 138 |
+
def test_pprint(self):
|
| 139 |
+
# GH#12622
|
| 140 |
+
nested_obj = {"foo": 1, "bar": [{"w": {"a": Timestamp("2011-01-01")}}] * 10}
|
| 141 |
+
result = pprint.pformat(nested_obj, width=50)
|
| 142 |
+
expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 143 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 144 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 145 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 146 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 147 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 148 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 149 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 150 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
|
| 151 |
+
{'w': {'a': Timestamp('2011-01-01 00:00:00')}}],
|
| 152 |
+
'foo': 1}"""
|
| 153 |
+
assert result == expected
|
| 154 |
+
|
| 155 |
+
def test_to_timestamp_repr_is_code(self):
|
| 156 |
+
zs = [
|
| 157 |
+
Timestamp("99-04-17 00:00:00", tz="UTC"),
|
| 158 |
+
Timestamp("2001-04-17 00:00:00", tz="UTC"),
|
| 159 |
+
Timestamp("2001-04-17 00:00:00", tz="America/Los_Angeles"),
|
| 160 |
+
Timestamp("2001-04-17 00:00:00", tz=None),
|
| 161 |
+
]
|
| 162 |
+
for z in zs:
|
| 163 |
+
assert eval(repr(z)) == z
|
| 164 |
+
|
| 165 |
+
def test_repr_matches_pydatetime_no_tz(self):
|
| 166 |
+
dt_date = datetime(2013, 1, 2)
|
| 167 |
+
assert str(dt_date) == str(Timestamp(dt_date))
|
| 168 |
+
|
| 169 |
+
dt_datetime = datetime(2013, 1, 2, 12, 1, 3)
|
| 170 |
+
assert str(dt_datetime) == str(Timestamp(dt_datetime))
|
| 171 |
+
|
| 172 |
+
dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45)
|
| 173 |
+
assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us))
|
| 174 |
+
|
| 175 |
+
ts_nanos_only = Timestamp(200)
|
| 176 |
+
assert str(ts_nanos_only) == "1970-01-01 00:00:00.000000200"
|
| 177 |
+
|
| 178 |
+
ts_nanos_micros = Timestamp(1200)
|
| 179 |
+
assert str(ts_nanos_micros) == "1970-01-01 00:00:00.000001200"
|
| 180 |
+
|
| 181 |
+
def test_repr_matches_pydatetime_tz_pytz(self):
|
| 182 |
+
dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc)
|
| 183 |
+
assert str(dt_date) == str(Timestamp(dt_date))
|
| 184 |
+
|
| 185 |
+
dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=pytz.utc)
|
| 186 |
+
assert str(dt_datetime) == str(Timestamp(dt_datetime))
|
| 187 |
+
|
| 188 |
+
dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=pytz.utc)
|
| 189 |
+
assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us))
|
| 190 |
+
|
| 191 |
+
def test_repr_matches_pydatetime_tz_dateutil(self):
|
| 192 |
+
utc = dateutil.tz.tzutc()
|
| 193 |
+
|
| 194 |
+
dt_date = datetime(2013, 1, 2, tzinfo=utc)
|
| 195 |
+
assert str(dt_date) == str(Timestamp(dt_date))
|
| 196 |
+
|
| 197 |
+
dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=utc)
|
| 198 |
+
assert str(dt_datetime) == str(Timestamp(dt_datetime))
|
| 199 |
+
|
| 200 |
+
dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc)
|
| 201 |
+
assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us))
|
py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_timezones.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for Timestamp timezone-related methods
|
| 3 |
+
"""
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from pandas._libs.tslibs import timezones
|
| 7 |
+
|
| 8 |
+
from pandas import Timestamp
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TestTimestampTZOperations:
|
| 12 |
+
# ------------------------------------------------------------------
|
| 13 |
+
|
| 14 |
+
def test_timestamp_timetz_equivalent_with_datetime_tz(self, tz_naive_fixture):
|
| 15 |
+
# GH21358
|
| 16 |
+
tz = timezones.maybe_get_tz(tz_naive_fixture)
|
| 17 |
+
|
| 18 |
+
stamp = Timestamp("2018-06-04 10:20:30", tz=tz)
|
| 19 |
+
_datetime = datetime(2018, 6, 4, hour=10, minute=20, second=30, tzinfo=tz)
|
| 20 |
+
|
| 21 |
+
result = stamp.timetz()
|
| 22 |
+
expected = _datetime.timetz()
|
| 23 |
+
|
| 24 |
+
assert result == expected
|