JustinTX commited on Apr 19

Commit

2fb2e00

verified ·

1 Parent(s): 75e081b

Add files using upload-large-folder tool

Browse files

Files changed (20) hide show

py311/lib/python3.11/site-packages/pandas/tests/arrays/period/test_astype.py +67 -0
py311/lib/python3.11/site-packages/pandas/tests/arrays/string_/test_string.py +893 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/__init__.py +0 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py +9 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py +130 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py +21 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +317 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py +907 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py +2188 -0
py311/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py +1087 -0
py311/lib/python3.11/site-packages/pandas/tests/io/parser/test_concatenate_chunks.py +36 -0
py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_categorical.py +214 -0
py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_read.py +417 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/__init__.py +0 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_constructors.py +51 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_contains.py +73 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_interval.py +87 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_overlaps.py +67 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_formats.py +201 -0
py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_timezones.py +24 -0

py311/lib/python3.11/site-packages/pandas/tests/arrays/period/test_astype.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import numpy as np
+import pytest
+from pandas.core.dtypes.dtypes import PeriodDtype
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.arrays import period_array
+@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
+def test_astype_int(dtype):
+    # We choose to ignore the sign and size of integers for
+    # Period/Datetime/Timedelta astype
+    arr = period_array(["2000", "2001", None], freq="D")
+    if np.dtype(dtype) != np.int64:
+        with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
+            arr.astype(dtype)
+        return
+    result = arr.astype(dtype)
+    expected = arr._ndarray.view("i8")
+    tm.assert_numpy_array_equal(result, expected)
+def test_astype_copies():
+    arr = period_array(["2000", "2001", None], freq="D")
+    result = arr.astype(np.int64, copy=False)
+    # Add the `.base`, since we now use `.asi8` which returns a view.
+    # We could maybe override it in PeriodArray to return ._ndarray directly.
+    assert result.base is arr._ndarray
+    result = arr.astype(np.int64, copy=True)
+    assert result is not arr._ndarray
+    tm.assert_numpy_array_equal(result, arr._ndarray.view("i8"))
+def test_astype_categorical():
+    arr = period_array(["2000", "2001", "2001", None], freq="D")
+    result = arr.astype("category")
+    categories = pd.PeriodIndex(["2000", "2001"], freq="D")
+    expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
+    tm.assert_categorical_equal(result, expected)
+def test_astype_period():
+    arr = period_array(["2000", "2001", None], freq="D")
+    result = arr.astype(PeriodDtype("M"))
+    expected = period_array(["2000", "2001", None], freq="M")
+    tm.assert_period_array_equal(result, expected)
+@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
+def test_astype_datetime(dtype):
+    arr = period_array(["2000", "2001", None], freq="D")
+    # slice off the [ns] so that the regex matches.
+    if dtype == "timedelta64[ns]":
+        with pytest.raises(TypeError, match=dtype[:-4]):
+            arr.astype(dtype)
+    else:
+        # GH#45038 allow period->dt64 because we allow dt64->period
+        result = arr.astype(dtype)
+        expected = pd.DatetimeIndex(["2000", "2001", pd.NaT], dtype=dtype)._data
+        tm.assert_datetime_array_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/arrays/string_/test_string.py ADDED Viewed

	@@ -0,0 +1,893 @@

+"""
+This module tests the functionality of StringArray and ArrowStringArray.
+Tests for the str accessors are in pandas/tests/strings/test_string_array.py
+"""
+import operator
+import numpy as np
+import pytest
+from pandas._config import using_string_dtype
+from pandas.compat import HAS_PYARROW
+from pandas.compat.pyarrow import (
+    pa_version_under12p0,
+    pa_version_under19p0,
+)
+import pandas.util._test_decorators as td
+from pandas.core.dtypes.common import is_dtype_equal
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.arrays.string_ import StringArrayNumpySemantics
+from pandas.core.arrays.string_arrow import (
+    ArrowStringArray,
+    ArrowStringArrayNumpySemantics,
+)
+@pytest.fixture
+def dtype(string_dtype_arguments):
+    """Fixture giving StringDtype from parametrized storage and na_value arguments"""
+    storage, na_value = string_dtype_arguments
+    return pd.StringDtype(storage=storage, na_value=na_value)
+@pytest.fixture
+def dtype2(string_dtype_arguments2):
+    storage, na_value = string_dtype_arguments2
+    return pd.StringDtype(storage=storage, na_value=na_value)
+@pytest.fixture
+def cls(dtype):
+    """Fixture giving array type from parametrized 'dtype'"""
+    return dtype.construct_array_type()
+def string_dtype_highest_priority(dtype1, dtype2):
+    if HAS_PYARROW:
+        DTYPE_HIERARCHY = [
+            pd.StringDtype("python", na_value=np.nan),
+            pd.StringDtype("pyarrow", na_value=np.nan),
+            pd.StringDtype("python", na_value=pd.NA),
+            pd.StringDtype("pyarrow", na_value=pd.NA),
+        ]
+    else:
+        DTYPE_HIERARCHY = [
+            pd.StringDtype("python", na_value=np.nan),
+            pd.StringDtype("python", na_value=pd.NA),
+        ]
+    h1 = DTYPE_HIERARCHY.index(dtype1)
+    h2 = DTYPE_HIERARCHY.index(dtype2)
+    return DTYPE_HIERARCHY[max(h1, h2)]
+def test_dtype_constructor():
+    pytest.importorskip("pyarrow")
+    with tm.assert_produces_warning(FutureWarning):
+        dtype = pd.StringDtype("pyarrow_numpy")
+    assert dtype == pd.StringDtype("pyarrow", na_value=np.nan)
+def test_dtype_equality():
+    pytest.importorskip("pyarrow")
+    dtype1 = pd.StringDtype("python")
+    dtype2 = pd.StringDtype("pyarrow")
+    dtype3 = pd.StringDtype("pyarrow", na_value=np.nan)
+    assert dtype1 == pd.StringDtype("python", na_value=pd.NA)
+    assert dtype1 != dtype2
+    assert dtype1 != dtype3
+    assert dtype2 == pd.StringDtype("pyarrow", na_value=pd.NA)
+    assert dtype2 != dtype1
+    assert dtype2 != dtype3
+    assert dtype3 == pd.StringDtype("pyarrow", na_value=np.nan)
+    assert dtype3 == pd.StringDtype("pyarrow", na_value=float("nan"))
+    assert dtype3 != dtype1
+    assert dtype3 != dtype2
+def test_repr(dtype):
+    df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)})
+    if dtype.na_value is np.nan:
+        expected = "     A\n0    a\n1  NaN\n2    b"
+    else:
+        expected = "      A\n0     a\n1  <NA>\n2     b"
+    assert repr(df) == expected
+    if dtype.na_value is np.nan:
+        expected = "0      a\n1    NaN\n2      b\nName: A, dtype: str"
+    else:
+        expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
+    assert repr(df.A) == expected
+    if dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
+        arr_name = "ArrowStringArray"
+        expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
+    elif dtype.storage == "pyarrow" and dtype.na_value is np.nan:
+        arr_name = "ArrowStringArrayNumpySemantics"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
+    elif dtype.storage == "python" and dtype.na_value is np.nan:
+        arr_name = "StringArrayNumpySemantics"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
+    else:
+        arr_name = "StringArray"
+        expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
+    assert repr(df.A.array) == expected
+def test_dtype_repr(dtype):
+    if dtype.storage == "pyarrow":
+        if dtype.na_value is pd.NA:
+            assert repr(dtype) == "string[pyarrow]"
+        else:
+            assert repr(dtype) == "<StringDtype(na_value=nan)>"
+    elif dtype.na_value is pd.NA:
+        assert repr(dtype) == "string[python]"
+    else:
+        assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
+def test_none_to_nan(cls, dtype):
+    a = cls._from_sequence(["a", None, "b"], dtype=dtype)
+    assert a[1] is not None
+    assert a[1] is a.dtype.na_value
+def test_setitem_validates(cls, dtype):
+    arr = cls._from_sequence(["a", "b"], dtype=dtype)
+    msg = "Invalid value '10' for dtype 'str"
+    with pytest.raises(TypeError, match=msg):
+        arr[0] = 10
+    msg = "Invalid value for dtype 'str"
+    with pytest.raises(TypeError, match=msg):
+        arr[:] = np.array([1, 2])
+def test_setitem_with_scalar_string(dtype):
+    # is_float_dtype considers some strings, like 'd', to be floats
+    # which can cause issues.
+    arr = pd.array(["a", "c"], dtype=dtype)
+    arr[0] = "d"
+    expected = pd.array(["d", "c"], dtype=dtype)
+    tm.assert_extension_array_equal(arr, expected)
+def test_setitem_with_array_with_missing(dtype):
+    # ensure that when setting with an array of values, we don't mutate the
+    # array `value` in __setitem__(self, key, value)
+    arr = pd.array(["a", "b", "c"], dtype=dtype)
+    value = np.array(["A", None])
+    value_orig = value.copy()
+    arr[[0, 1]] = value
+    expected = pd.array(["A", pd.NA, "c"], dtype=dtype)
+    tm.assert_extension_array_equal(arr, expected)
+    tm.assert_numpy_array_equal(value, value_orig)
+def test_astype_roundtrip(dtype):
+    ser = pd.Series(pd.date_range("2000", periods=12))
+    ser[0] = None
+    casted = ser.astype(dtype)
+    assert is_dtype_equal(casted.dtype, dtype)
+    result = casted.astype("datetime64[ns]")
+    tm.assert_series_equal(result, ser)
+    # GH#38509 same thing for timedelta64
+    ser2 = ser - ser.iloc[-1]
+    casted2 = ser2.astype(dtype)
+    assert is_dtype_equal(casted2.dtype, dtype)
+    result2 = casted2.astype(ser2.dtype)
+    tm.assert_series_equal(result2, ser2)
+def test_add(dtype):
+    a = pd.Series(["a", "b", "c", None, None], dtype=dtype)
+    b = pd.Series(["x", "y", None, "z", None], dtype=dtype)
+    result = a + b
+    expected = pd.Series(["ax", "by", None, None, None], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+    result = a.add(b)
+    tm.assert_series_equal(result, expected)
+    result = a.radd(b)
+    expected = pd.Series(["xa", "yb", None, None, None], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+    result = a.add(b, fill_value="-")
+    expected = pd.Series(["ax", "by", "c-", "-z", None], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+def test_add_2d(dtype, request):
+    if dtype.storage == "pyarrow":
+        reason = "Failed: DID NOT RAISE <class 'ValueError'>"
+        mark = pytest.mark.xfail(raises=None, reason=reason)
+        request.applymarker(mark)
+    a = pd.array(["a", "b", "c"], dtype=dtype)
+    b = np.array([["a", "b", "c"]], dtype=object)
+    with pytest.raises(ValueError, match="3 != 1"):
+        a + b
+    s = pd.Series(a)
+    with pytest.raises(ValueError, match="3 != 1"):
+        s + b
+def test_add_sequence(dtype):
+    a = pd.array(["a", "b", None, None], dtype=dtype)
+    other = ["x", None, "y", None]
+    result = a + other
+    expected = pd.array(["ax", None, None, None], dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+    result = other + a
+    expected = pd.array(["xa", None, None, None], dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+def test_mul(dtype):
+    a = pd.array(["a", "b", None], dtype=dtype)
+    result = a * 2
+    expected = pd.array(["aa", "bb", None], dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+    result = 2 * a
+    tm.assert_extension_array_equal(result, expected)
+@pytest.mark.xfail(reason="GH-28527")
+def test_add_strings(dtype):
+    arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
+    df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
+    assert arr.__add__(df) is NotImplemented
+    result = arr + df
+    expected = pd.DataFrame([["at", "by", "cv", "dw"]]).astype(dtype)
+    tm.assert_frame_equal(result, expected)
+    result = df + arr
+    expected = pd.DataFrame([["ta", "yb", "vc", "wd"]]).astype(dtype)
+    tm.assert_frame_equal(result, expected)
+@pytest.mark.xfail(reason="GH-28527")
+def test_add_frame(dtype):
+    arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
+    df = pd.DataFrame([["x", np.nan, "y", np.nan]])
+    assert arr.__add__(df) is NotImplemented
+    result = arr + df
+    expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
+    tm.assert_frame_equal(result, expected)
+    result = df + arr
+    expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
+    tm.assert_frame_equal(result, expected)
+def test_comparison_methods_scalar(comparison_op, dtype):
+    op_name = f"__{comparison_op.__name__}__"
+    a = pd.array(["a", None, "c"], dtype=dtype)
+    other = "a"
+    result = getattr(a, op_name)(other)
+    if dtype.na_value is np.nan:
+        expected = np.array([getattr(item, op_name)(other) for item in a])
+        if comparison_op == operator.ne:
+            expected[1] = True
+        else:
+            expected[1] = False
+        tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
+    else:
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
+        expected = pd.array(expected, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
+    op_name = f"__{comparison_op.__name__}__"
+    a = pd.array(["a", None, "c"], dtype=dtype)
+    result = getattr(a, op_name)(pd.NA)
+    if dtype.na_value is np.nan:
+        if operator.ne == comparison_op:
+            expected = np.array([True, True, True])
+        else:
+            expected = np.array([False, False, False])
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = pd.array([None, None, None], dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+        tm.assert_extension_array_equal(result, expected)
+def test_comparison_methods_scalar_not_string(comparison_op, dtype):
+    op_name = f"__{comparison_op.__name__}__"
+    a = pd.array(["a", None, "c"], dtype=dtype)
+    other = 42
+    if op_name not in ["__eq__", "__ne__"]:
+        with pytest.raises(TypeError, match="Invalid comparison|not supported between"):
+            getattr(a, op_name)(other)
+        return
+    result = getattr(a, op_name)(other)
+    if dtype.na_value is np.nan:
+        expected_data = {
+            "__eq__": [False, False, False],
+            "__ne__": [True, True, True],
+        }[op_name]
+        expected = np.array(expected_data)
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
+            op_name
+        ]
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = pd.array(expected_data, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+def test_comparison_methods_array(comparison_op, dtype, dtype2):
+    op_name = f"__{comparison_op.__name__}__"
+    a = pd.array(["a", None, "c"], dtype=dtype)
+    other = pd.array([None, None, "c"], dtype=dtype2)
+    result = comparison_op(a, other)
+    # ensure operation is commutative
+    result2 = comparison_op(other, a)
+    tm.assert_equal(result, result2)
+    if dtype.na_value is np.nan and dtype2.na_value is np.nan:
+        if operator.ne == comparison_op:
+            expected = np.array([True, True, False])
+        else:
+            expected = np.array([False, False, False])
+            expected[-1] = getattr(other[-1], op_name)(a[-1])
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        max_dtype = string_dtype_highest_priority(dtype, dtype2)
+        if max_dtype.storage == "python":
+            expected_dtype = "boolean"
+        else:
+            expected_dtype = "bool[pyarrow]"
+        expected = np.full(len(a), fill_value=None, dtype="object")
+        expected[-1] = getattr(other[-1], op_name)(a[-1])
+        expected = pd.array(expected, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+@td.skip_if_no("pyarrow")
+def test_comparison_methods_array_arrow_extension(comparison_op, dtype2):
+    # Test pd.ArrowDtype(pa.string()) against other string arrays
+    import pyarrow as pa
+    op_name = f"__{comparison_op.__name__}__"
+    dtype = pd.ArrowDtype(pa.string())
+    a = pd.array(["a", None, "c"], dtype=dtype)
+    other = pd.array([None, None, "c"], dtype=dtype2)
+    result = comparison_op(a, other)
+    # ensure operation is commutative
+    result2 = comparison_op(other, a)
+    tm.assert_equal(result, result2)
+    expected = pd.array([None, None, True], dtype="bool[pyarrow]")
+    expected[-1] = getattr(other[-1], op_name)(a[-1])
+    tm.assert_extension_array_equal(result, expected)
+def test_comparison_methods_list(comparison_op, dtype):
+    op_name = f"__{comparison_op.__name__}__"
+    a = pd.array(["a", None, "c"], dtype=dtype)
+    other = [None, None, "c"]
+    result = comparison_op(a, other)
+    # ensure operation is commutative
+    result2 = comparison_op(other, a)
+    tm.assert_equal(result, result2)
+    if dtype.na_value is np.nan:
+        if operator.ne == comparison_op:
+            expected = np.array([True, True, False])
+        else:
+            expected = np.array([False, False, False])
+            expected[-1] = getattr(other[-1], op_name)(a[-1])
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
+        expected = np.full(len(a), fill_value=None, dtype="object")
+        expected[-1] = getattr(other[-1], op_name)(a[-1])
+        expected = pd.array(expected, dtype=expected_dtype)
+        tm.assert_extension_array_equal(result, expected)
+def test_constructor_raises(cls):
+    if cls is pd.arrays.StringArray:
+        msg = "StringArray requires a sequence of strings or pandas.NA"
+    elif cls is StringArrayNumpySemantics:
+        msg = "StringArrayNumpySemantics requires a sequence of strings or NaN"
+    else:
+        msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArray"
+    with pytest.raises(ValueError, match=msg):
+        cls(np.array(["a", "b"], dtype="S1"))
+    with pytest.raises(ValueError, match=msg):
+        cls(np.array([]))
+    if cls is pd.arrays.StringArray or cls is StringArrayNumpySemantics:
+        # GH#45057 np.nan and None do NOT raise, as they are considered valid NAs
+        #  for string dtype
+        cls(np.array(["a", np.nan], dtype=object))
+        cls(np.array(["a", None], dtype=object))
+    else:
+        with pytest.raises(ValueError, match=msg):
+            cls(np.array(["a", np.nan], dtype=object))
+        with pytest.raises(ValueError, match=msg):
+            cls(np.array(["a", None], dtype=object))
+    with pytest.raises(ValueError, match=msg):
+        cls(np.array(["a", pd.NaT], dtype=object))
+    with pytest.raises(ValueError, match=msg):
+        cls(np.array(["a", np.datetime64("NaT", "ns")], dtype=object))
+    with pytest.raises(ValueError, match=msg):
+        cls(np.array(["a", np.timedelta64("NaT", "ns")], dtype=object))
+@pytest.mark.parametrize("na", [np.nan, np.float64("nan"), float("nan"), None, pd.NA])
+def test_constructor_nan_like(na):
+    expected = pd.arrays.StringArray(np.array(["a", pd.NA]))
+    tm.assert_extension_array_equal(
+        pd.arrays.StringArray(np.array(["a", na], dtype="object")), expected
+    )
+@pytest.mark.parametrize("copy", [True, False])
+def test_from_sequence_no_mutate(copy, cls, dtype):
+    nan_arr = np.array(["a", np.nan], dtype=object)
+    expected_input = nan_arr.copy()
+    na_arr = np.array(["a", pd.NA], dtype=object)
+    result = cls._from_sequence(nan_arr, dtype=dtype, copy=copy)
+    if cls in (ArrowStringArray, ArrowStringArrayNumpySemantics):
+        import pyarrow as pa
+        expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True))
+    elif cls is StringArrayNumpySemantics:
+        expected = cls(nan_arr)
+    else:
+        expected = cls(na_arr)
+    tm.assert_extension_array_equal(result, expected)
+    tm.assert_numpy_array_equal(nan_arr, expected_input)
+def test_astype_int(dtype):
+    arr = pd.array(["1", "2", "3"], dtype=dtype)
+    result = arr.astype("int64")
+    expected = np.array([1, 2, 3], dtype="int64")
+    tm.assert_numpy_array_equal(result, expected)
+    arr = pd.array(["1", pd.NA, "3"], dtype=dtype)
+    if dtype.na_value is np.nan:
+        err = ValueError
+        msg = "cannot convert float NaN to integer"
+    else:
+        err = TypeError
+        msg = (
+            r"int\(\) argument must be a string, a bytes-like "
+            r"object or a( real)? number"
+        )
+    with pytest.raises(err, match=msg):
+        arr.astype("int64")
+def test_astype_nullable_int(dtype):
+    arr = pd.array(["1", pd.NA, "3"], dtype=dtype)
+    result = arr.astype("Int64")
+    expected = pd.array([1, pd.NA, 3], dtype="Int64")
+    tm.assert_extension_array_equal(result, expected)
+def test_astype_float(dtype, any_float_dtype):
+    # Don't compare arrays (37974)
+    ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype)
+    result = ser.astype(any_float_dtype)
+    expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype)
+    tm.assert_series_equal(result, expected)
+@pytest.mark.parametrize("skipna", [True, False])
+def test_reduce(skipna, dtype):
+    arr = pd.Series(["a", "b", "c"], dtype=dtype)
+    result = arr.sum(skipna=skipna)
+    assert result == "abc"
+@pytest.mark.parametrize("skipna", [True, False])
+def test_reduce_missing(skipna, dtype):
+    arr = pd.Series([None, "a", None, "b", "c", None], dtype=dtype)
+    result = arr.sum(skipna=skipna)
+    if skipna:
+        assert result == "abc"
+    else:
+        assert pd.isna(result)
+@pytest.mark.parametrize("method", ["min", "max"])
+@pytest.mark.parametrize("skipna", [True, False])
+def test_min_max(method, skipna, dtype):
+    arr = pd.Series(["a", "b", "c", None], dtype=dtype)
+    result = getattr(arr, method)(skipna=skipna)
+    if skipna:
+        expected = "a" if method == "min" else "c"
+        assert result == expected
+    else:
+        assert result is arr.dtype.na_value
+@pytest.mark.parametrize("method", ["min", "max"])
+@pytest.mark.parametrize("box", [pd.Series, pd.array])
+def test_min_max_numpy(method, box, dtype, request):
+    if dtype.storage == "pyarrow" and box is pd.array:
+        if box is pd.array:
+            reason = "'<=' not supported between instances of 'str' and 'NoneType'"
+        else:
+            reason = "'ArrowStringArray' object has no attribute 'max'"
+        mark = pytest.mark.xfail(raises=TypeError, reason=reason)
+        request.applymarker(mark)
+    arr = box(["a", "b", "c", None], dtype=dtype)
+    result = getattr(np, method)(arr)
+    expected = "a" if method == "min" else "c"
+    assert result == expected
+def test_fillna_args(dtype):
+    # GH 37987
+    arr = pd.array(["a", pd.NA], dtype=dtype)
+    res = arr.fillna(value="b")
+    expected = pd.array(["a", "b"], dtype=dtype)
+    tm.assert_extension_array_equal(res, expected)
+    res = arr.fillna(value=np.str_("b"))
+    expected = pd.array(["a", "b"], dtype=dtype)
+    tm.assert_extension_array_equal(res, expected)
+    msg = "Invalid value '1' for dtype 'str"
+    with pytest.raises(TypeError, match=msg):
+        arr.fillna(value=1)
+def test_arrow_array(dtype):
+    # protocol added in 0.15.0
+    pa = pytest.importorskip("pyarrow")
+    import pyarrow.compute as pc
+    data = pd.array(["a", "b", "c"], dtype=dtype)
+    arr = pa.array(data)
+    expected = pa.array(list(data), type=pa.large_string(), from_pandas=True)
+    if dtype.storage == "pyarrow" and pa_version_under12p0:
+        expected = pa.chunked_array(expected)
+    if dtype.storage == "python":
+        expected = pc.cast(expected, pa.string())
+    assert arr.equals(expected)
+@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
+    # roundtrip possible from arrow 1.0.0
+    pa = pytest.importorskip("pyarrow")
+    data = pd.array(["a", "b", None], dtype=dtype)
+    df = pd.DataFrame({"a": data})
+    table = pa.table(df)
+    if dtype.storage == "python":
+        assert table.field("a").type == "string"
+    else:
+        assert table.field("a").type == "large_string"
+    with pd.option_context("string_storage", string_storage):
+        result = table.to_pandas()
+    if dtype.na_value is np.nan and not using_infer_string:
+        assert result["a"].dtype == "object"
+    else:
+        assert isinstance(result["a"].dtype, pd.StringDtype)
+        expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
+        tm.assert_frame_equal(result, expected)
+        # ensure the missing value is represented by NA and not np.nan or None
+        assert result.loc[2, "a"] is result["a"].dtype.na_value
+@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+def test_arrow_from_string(using_infer_string):
+    # not roundtrip,  but starting with pyarrow table without pandas metadata
+    pa = pytest.importorskip("pyarrow")
+    table = pa.table({"a": pa.array(["a", "b", None], type=pa.string())})
+    result = table.to_pandas()
+    if using_infer_string and not pa_version_under19p0:
+        expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="str")
+    else:
+        expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="object")
+    tm.assert_frame_equal(result, expected)
+@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
+    # GH-41040
+    pa = pytest.importorskip("pyarrow")
+    data = pd.array([], dtype=dtype)
+    df = pd.DataFrame({"a": data})
+    table = pa.table(df)
+    if dtype.storage == "python":
+        assert table.field("a").type == "string"
+    else:
+        assert table.field("a").type == "large_string"
+    # Instantiate the same table with no chunks at all
+    table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
+    with pd.option_context("string_storage", string_storage):
+        result = table.to_pandas()
+    if dtype.na_value is np.nan and not using_string_dtype():
+        assert result["a"].dtype == "object"
+    else:
+        assert isinstance(result["a"].dtype, pd.StringDtype)
+        expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
+        tm.assert_frame_equal(result, expected)
+def test_value_counts_na(dtype):
+    if dtype.na_value is np.nan:
+        exp_dtype = "int64"
+    elif dtype.storage == "pyarrow":
+        exp_dtype = "int64[pyarrow]"
+    else:
+        exp_dtype = "Int64"
+    arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
+    result = arr.value_counts(dropna=False)
+    expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype=exp_dtype, name="count")
+    tm.assert_series_equal(result, expected)
+    result = arr.value_counts(dropna=True)
+    expected = pd.Series([2, 1], index=arr[:2], dtype=exp_dtype, name="count")
+    tm.assert_series_equal(result, expected)
+def test_value_counts_with_normalize(dtype):
+    if dtype.na_value is np.nan:
+        exp_dtype = np.float64
+    elif dtype.storage == "pyarrow":
+        exp_dtype = "double[pyarrow]"
+    else:
+        exp_dtype = "Float64"
+    ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
+    result = ser.value_counts(normalize=True)
+    expected = pd.Series([2, 1], index=ser[:2], dtype=exp_dtype, name="proportion") / 3
+    tm.assert_series_equal(result, expected)
+@pytest.mark.parametrize(
+    "values, expected",
+    [
+        (["a", "b", "c"], np.array([False, False, False])),
+        (["a", "b", None], np.array([False, False, True])),
+    ],
+)
+def test_use_inf_as_na(values, expected, dtype):
+    # https://github.com/pandas-dev/pandas/issues/33655
+    values = pd.array(values, dtype=dtype)
+    msg = "use_inf_as_na option is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        with pd.option_context("mode.use_inf_as_na", True):
+            result = values.isna()
+            tm.assert_numpy_array_equal(result, expected)
+            result = pd.Series(values).isna()
+            expected = pd.Series(expected)
+            tm.assert_series_equal(result, expected)
+            result = pd.DataFrame(values).isna()
+            expected = pd.DataFrame(expected)
+            tm.assert_frame_equal(result, expected)
+def test_value_counts_sort_false(dtype):
+    if dtype.na_value is np.nan:
+        exp_dtype = "int64"
+    elif dtype.storage == "pyarrow":
+        exp_dtype = "int64[pyarrow]"
+    else:
+        exp_dtype = "Int64"
+    ser = pd.Series(["a", "b", "c", "b"], dtype=dtype)
+    result = ser.value_counts(sort=False)
+    expected = pd.Series([1, 2, 1], index=ser[:3], dtype=exp_dtype, name="count")
+    tm.assert_series_equal(result, expected)
+def test_memory_usage(dtype):
+    # GH 33963
+    if dtype.storage == "pyarrow":
+        pytest.skip(f"not applicable for {dtype.storage}")
+    series = pd.Series(["a", "b", "c"], dtype=dtype)
+    assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True)
+@pytest.mark.parametrize("float_dtype", [np.float16, np.float32, np.float64])
+def test_astype_from_float_dtype(float_dtype, dtype):
+    # https://github.com/pandas-dev/pandas/issues/36451
+    ser = pd.Series([0.1], dtype=float_dtype)
+    result = ser.astype(dtype)
+    expected = pd.Series(["0.1"], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+def test_to_numpy_returns_pdna_default(dtype):
+    arr = pd.array(["a", pd.NA, "b"], dtype=dtype)
+    result = np.array(arr)
+    expected = np.array(["a", dtype.na_value, "b"], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
+def test_to_numpy_na_value(dtype, nulls_fixture):
+    na_value = nulls_fixture
+    arr = pd.array(["a", pd.NA, "b"], dtype=dtype)
+    result = arr.to_numpy(na_value=na_value)
+    expected = np.array(["a", na_value, "b"], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
+def test_isin(dtype, fixed_now_ts):
+    s = pd.Series(["a", "b", None], dtype=dtype)
+    result = s.isin(["a", "c"])
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+    result = s.isin(["a", pd.NA])
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+    result = s.isin([])
+    expected = pd.Series([False, False, False])
+    tm.assert_series_equal(result, expected)
+    result = s.isin(["a", fixed_now_ts])
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+    result = s.isin([fixed_now_ts])
+    expected = pd.Series([False, False, False])
+    tm.assert_series_equal(result, expected)
+def test_isin_string_array(dtype, dtype2):
+    s = pd.Series(["a", "b", None], dtype=dtype)
+    result = s.isin(pd.array(["a", "c"], dtype=dtype2))
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+    result = s.isin(pd.array(["a", None], dtype=dtype2))
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+def test_isin_arrow_string_array(dtype):
+    pa = pytest.importorskip("pyarrow")
+    s = pd.Series(["a", "b", None], dtype=dtype)
+    result = s.isin(pd.array(["a", "c"], dtype=pd.ArrowDtype(pa.string())))
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+    result = s.isin(pd.array(["a", None], dtype=pd.ArrowDtype(pa.string())))
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+def test_setitem_scalar_with_mask_validation(dtype):
+    # https://github.com/pandas-dev/pandas/issues/47628
+    # setting None with a boolean mask (through _putmaks) should still result
+    # in pd.NA values in the underlying array
+    ser = pd.Series(["a", "b", "c"], dtype=dtype)
+    mask = np.array([False, True, False])
+    ser[mask] = None
+    assert ser.array[1] is ser.dtype.na_value
+    # for other non-string we should also raise an error
+    ser = pd.Series(["a", "b", "c"], dtype=dtype)
+    msg = "Invalid value '1' for dtype 'str"
+    with pytest.raises(TypeError, match=msg):
+        ser[mask] = 1
+def test_from_numpy_str(dtype):
+    vals = ["a", "b", "c"]
+    arr = np.array(vals, dtype=np.str_)
+    result = pd.array(arr, dtype=dtype)
+    expected = pd.array(vals, dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+def test_tolist(dtype):
+    vals = ["a", "b", "c"]
+    arr = pd.array(vals, dtype=dtype)
+    result = arr.tolist()
+    expected = vals
+    tm.assert_equal(result, expected)
+@pytest.mark.parametrize("box", [pd.Series, pd.array])
+def test_numpy_array_ufunc(dtype, box):
+    arr = box(["a", "bb", "ccc"], dtype=dtype)
+    # custom ufunc that works with string (object) input -> returning numeric
+    str_len_ufunc = np.frompyfunc(lambda x: len(x), 1, 1)
+    result = str_len_ufunc(arr)
+    expected_cls = pd.Series if box is pd.Series else np.array
+    # TODO we should infer int64 dtype here?
+    expected = expected_cls([1, 2, 3], dtype=object)
+    tm.assert_equal(result, expected)
+    # custom ufunc returning strings
+    str_multiply_ufunc = np.frompyfunc(lambda x: x * 2, 1, 1)
+    result = str_multiply_ufunc(arr)
+    expected = box(["aa", "bbbb", "cccccc"], dtype=dtype)
+    if dtype.storage == "pyarrow":
+        # TODO ArrowStringArray should also preserve the class / dtype
+        if box is pd.array:
+            expected = np.array(["aa", "bbbb", "cccccc"], dtype=object)
+        else:
+            # not specifying the dtype because the exact dtype is not yet preserved
+            expected = pd.Series(["aa", "bbbb", "cccccc"])
+    tm.assert_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/io/json/__init__.py ADDED Viewed

File without changes

py311/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import pytest
+@pytest.fixture(params=["split", "records", "index", "columns", "values"])
+def orient(request):
+    """
+    Fixture for orients excluding the table format.
+    """
+    return request.param

py311/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from io import (
+    BytesIO,
+    StringIO,
+)
+import pytest
+import pandas.util._test_decorators as td
+import pandas as pd
+import pandas._testing as tm
+def test_compression_roundtrip(compression):
+    df = pd.DataFrame(
+        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
+        index=["A", "B"],
+        columns=["X", "Y", "Z"],
+    )
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
+        # explicitly ensure file was compressed.
+        with tm.decompress_file(path, compression) as fh:
+            result = fh.read().decode("utf8")
+            data = StringIO(result)
+        tm.assert_frame_equal(df, pd.read_json(data))
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
+    uncompressed_df = pd.read_json(uncompressed_path)
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
+    compressed_df = pd.read_json(compressed_path, compression="zip")
+    tm.assert_frame_equal(uncompressed_df, compressed_df)
+@td.skip_if_not_us_locale
+@pytest.mark.single_cpu
+def test_with_s3_url(compression, s3_public_bucket, s3so):
+    # Bucket created in tests/io/conftest.py
+    df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        with open(path, "rb") as f:
+            s3_public_bucket.put_object(Key="test-1", Body=f)
+    roundtripped_df = pd.read_json(
+        f"s3://{s3_public_bucket.name}/test-1",
+        compression=compression,
+        storage_options=s3so,
+    )
+    tm.assert_frame_equal(df, roundtripped_df)
+def test_lines_with_compression(compression):
+    with tm.ensure_clean() as path:
+        df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+        df.to_json(path, orient="records", lines=True, compression=compression)
+        roundtripped_df = pd.read_json(path, lines=True, compression=compression)
+        tm.assert_frame_equal(df, roundtripped_df)
+def test_chunksize_with_compression(compression):
+    with tm.ensure_clean() as path:
+        df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
+        df.to_json(path, orient="records", lines=True, compression=compression)
+        with pd.read_json(
+            path, lines=True, chunksize=1, compression=compression
+        ) as res:
+            roundtripped_df = pd.concat(res)
+        tm.assert_frame_equal(df, roundtripped_df)
+def test_write_unsupported_compression_type():
+    df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(path, compression="unsupported")
+def test_read_unsupported_compression_type():
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            pd.read_json(path, compression="unsupported")
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+@pytest.mark.parametrize("to_infer", [True, False])
+@pytest.mark.parametrize("read_infer", [True, False])
+def test_to_json_compression(
+    compression_only, read_infer, to_infer, compression_to_extension, infer_string
+):
+    with pd.option_context("future.infer_string", infer_string):
+        # see gh-15008
+        compression = compression_only
+        # We'll complete file extension subsequently.
+        filename = "test."
+        filename += compression_to_extension[compression]
+        df = pd.DataFrame({"A": [1]})
+        to_compression = "infer" if to_infer else compression
+        read_compression = "infer" if read_infer else compression
+        with tm.ensure_clean(filename) as path:
+            df.to_json(path, compression=to_compression)
+            result = pd.read_json(path, compression=read_compression)
+            tm.assert_frame_equal(result, df)
+def test_to_json_compression_mode(compression):
+    # GH 39985 (read_json does not support user-provided binary files)
+    expected = pd.DataFrame({"A": [1]})
+    with BytesIO() as buffer:
+        expected.to_json(buffer, compression=compression)
+        # df = pd.read_json(buffer, compression=compression)
+        # tm.assert_frame_equal(expected, df)

py311/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""
+Tests for the deprecated keyword arguments for `read_json`.
+"""
+from io import StringIO
+import pandas as pd
+import pandas._testing as tm
+from pandas.io.json import read_json
+def test_good_kwargs():
+    df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2])
+    with tm.assert_produces_warning(None):
+        data1 = StringIO(df.to_json(orient="split"))
+        tm.assert_frame_equal(df, read_json(data1, orient="split"))
+        data2 = StringIO(df.to_json(orient="columns"))
+        tm.assert_frame_equal(df, read_json(data2, orient="columns"))
+        data3 = StringIO(df.to_json(orient="index"))
+        tm.assert_frame_equal(df, read_json(data3, orient="index"))

py311/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""Tests for ExtensionDtype Table Schema integration."""
+from collections import OrderedDict
+import datetime as dt
+import decimal
+from io import StringIO
+import json
+import pytest
+from pandas import (
+    NA,
+    DataFrame,
+    Index,
+    array,
+    read_json,
+)
+import pandas._testing as tm
+from pandas.core.arrays.integer import Int64Dtype
+from pandas.core.arrays.string_ import StringDtype
+from pandas.core.series import Series
+from pandas.tests.extension.date import (
+    DateArray,
+    DateDtype,
+)
+from pandas.tests.extension.decimal.array import (
+    DecimalArray,
+    DecimalDtype,
+)
+from pandas.io.json._table_schema import (
+    as_json_table_type,
+    build_table_schema,
+)
+class TestBuildSchema:
+    def test_build_table_schema(self):
+        df = DataFrame(
+            {
+                "A": DateArray([dt.date(2021, 10, 10)]),
+                "B": DecimalArray([decimal.Decimal(10)]),
+                "C": array(["pandas"], dtype="string"),
+                "D": array([10], dtype="Int64"),
+            }
+        )
+        result = build_table_schema(df, version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "A", "type": "any", "extDtype": "DateDtype"},
+                {"name": "B", "type": "number", "extDtype": "decimal"},
+                {"name": "C", "type": "string", "extDtype": "string"},
+                {"name": "D", "type": "integer", "extDtype": "Int64"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+        result = build_table_schema(df)
+        assert "pandas_version" in result
+class TestTableSchemaType:
+    @pytest.mark.parametrize(
+        "date_data",
+        [
+            DateArray([dt.date(2021, 10, 10)]),
+            DateArray(dt.date(2021, 10, 10)),
+            Series(DateArray(dt.date(2021, 10, 10))),
+        ],
+    )
+    def test_as_json_table_type_ext_date_array_dtype(self, date_data):
+        assert as_json_table_type(date_data.dtype) == "any"
+    def test_as_json_table_type_ext_date_dtype(self):
+        assert as_json_table_type(DateDtype()) == "any"
+    @pytest.mark.parametrize(
+        "decimal_data",
+        [
+            DecimalArray([decimal.Decimal(10)]),
+            Series(DecimalArray([decimal.Decimal(10)])),
+        ],
+    )
+    def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data):
+        assert as_json_table_type(decimal_data.dtype) == "number"
+    def test_as_json_table_type_ext_decimal_dtype(self):
+        assert as_json_table_type(DecimalDtype()) == "number"
+    @pytest.mark.parametrize(
+        "string_data",
+        [
+            array(["pandas"], dtype="string"),
+            Series(array(["pandas"], dtype="string")),
+        ],
+    )
+    def test_as_json_table_type_ext_string_array_dtype(self, string_data):
+        assert as_json_table_type(string_data.dtype) == "string"
+    def test_as_json_table_type_ext_string_dtype(self):
+        assert as_json_table_type(StringDtype()) == "string"
+    @pytest.mark.parametrize(
+        "integer_data",
+        [
+            array([10], dtype="Int64"),
+            Series(array([10], dtype="Int64")),
+        ],
+    )
+    def test_as_json_table_type_ext_integer_array_dtype(self, integer_data):
+        assert as_json_table_type(integer_data.dtype) == "integer"
+    def test_as_json_table_type_ext_integer_dtype(self):
+        assert as_json_table_type(Int64Dtype()) == "integer"
+class TestTableOrient:
+    @pytest.fixture
+    def da(self):
+        return DateArray([dt.date(2021, 10, 10)])
+    @pytest.fixture
+    def dc(self):
+        return DecimalArray([decimal.Decimal(10)])
+    @pytest.fixture
+    def sa(self):
+        return array(["pandas"], dtype="string")
+    @pytest.fixture
+    def ia(self):
+        return array([10], dtype="Int64")
+    @pytest.fixture
+    def df(self, da, dc, sa, ia):
+        return DataFrame(
+            {
+                "A": da,
+                "B": dc,
+                "C": sa,
+                "D": ia,
+            }
+        )
+    def test_build_date_series(self, da):
+        s = Series(da, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "any", "extDtype": "DateDtype"},
+        ]
+        schema = {"fields": fields, "primaryKey": ["id"]}
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000")])]),
+            ]
+        )
+        assert result == expected
+    def test_build_decimal_series(self, dc):
+        s = Series(dc, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "number", "extDtype": "decimal"},
+        ]
+        schema = {"fields": fields, "primaryKey": ["id"]}
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", 10.0)])]),
+            ]
+        )
+        assert result == expected
+    def test_build_string_series(self, sa):
+        s = Series(sa, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "string", "extDtype": "string"},
+        ]
+        schema = {"fields": fields, "primaryKey": ["id"]}
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", "pandas")])]),
+            ]
+        )
+        assert result == expected
+    def test_build_int64_series(self, ia):
+        s = Series(ia, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "integer", "extDtype": "Int64"},
+        ]
+        schema = {"fields": fields, "primaryKey": ["id"]}
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", 10)])]),
+            ]
+        )
+        assert result == expected
+    def test_to_json(self, df):
+        df = df.copy()
+        df.index.name = "idx"
+        result = df.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+        fields = [
+            OrderedDict({"name": "idx", "type": "integer"}),
+            OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
+            OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
+            OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
+            OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
+        ]
+        schema = OrderedDict({"fields": fields, "primaryKey": ["idx"]})
+        data = [
+            OrderedDict(
+                [
+                    ("idx", 0),
+                    ("A", "2021-10-10T00:00:00.000"),
+                    ("B", 10.0),
+                    ("C", "pandas"),
+                    ("D", 10),
+                ]
+            )
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+        assert result == expected
+    def test_json_ext_dtype_reading_roundtrip(self):
+        # GH#40255
+        df = DataFrame(
+            {
+                "a": Series([2, NA], dtype="Int64"),
+                "b": Series([1.5, NA], dtype="Float64"),
+                "c": Series([True, NA], dtype="boolean"),
+            },
+            index=Index([1, NA], dtype="Int64"),
+        )
+        expected = df.copy()
+        data_json = df.to_json(orient="table", indent=4)
+        result = read_json(StringIO(data_json), orient="table")
+        tm.assert_frame_equal(result, expected)
+    def test_json_ext_dtype_reading(self):
+        # GH#40255
+        data_json = """{
+            "schema":{
+                "fields":[
+                    {
+                        "name":"a",
+                        "type":"integer",
+                        "extDtype":"Int64"
+                    }
+                ],
+            },
+            "data":[
+                {
+                    "a":2
+                },
+                {
+                    "a":null
+                }
+            ]
+        }"""
+        result = read_json(StringIO(data_json), orient="table")
+        expected = DataFrame({"a": Series([2, NA], dtype="Int64")})
+        tm.assert_frame_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py ADDED Viewed

	@@ -0,0 +1,907 @@

+import json
+import numpy as np
+import pytest
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    json_normalize,
+)
+import pandas._testing as tm
+from pandas.io.json._normalize import nested_to_record
+@pytest.fixture
+def deep_nested():
+    # deeply nested data
+    return [
+        {
+            "country": "USA",
+            "states": [
+                {
+                    "name": "California",
+                    "cities": [
+                        {"name": "San Francisco", "pop": 12345},
+                        {"name": "Los Angeles", "pop": 12346},
+                    ],
+                },
+                {
+                    "name": "Ohio",
+                    "cities": [
+                        {"name": "Columbus", "pop": 1234},
+                        {"name": "Cleveland", "pop": 1236},
+                    ],
+                },
+            ],
+        },
+        {
+            "country": "Germany",
+            "states": [
+                {"name": "Bayern", "cities": [{"name": "Munich", "pop": 12347}]},
+                {
+                    "name": "Nordrhein-Westfalen",
+                    "cities": [
+                        {"name": "Duesseldorf", "pop": 1238},
+                        {"name": "Koeln", "pop": 1239},
+                    ],
+                },
+            ],
+        },
+    ]
+@pytest.fixture
+def state_data():
+    return [
+        {
+            "counties": [
+                {"name": "Dade", "population": 12345},
+                {"name": "Broward", "population": 40000},
+                {"name": "Palm Beach", "population": 60000},
+            ],
+            "info": {"governor": "Rick Scott"},
+            "shortname": "FL",
+            "state": "Florida",
+        },
+        {
+            "counties": [
+                {"name": "Summit", "population": 1234},
+                {"name": "Cuyahoga", "population": 1337},
+            ],
+            "info": {"governor": "John Kasich"},
+            "shortname": "OH",
+            "state": "Ohio",
+        },
+    ]
+@pytest.fixture
+def author_missing_data():
+    return [
+        {"info": None},
+        {
+            "info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
+            "author_name": {"first": "Jane", "last_name": "Doe"},
+        },
+    ]
+@pytest.fixture
+def missing_metadata():
+    return [
+        {
+            "name": "Alice",
+            "addresses": [
+                {
+                    "number": 9562,
+                    "street": "Morris St.",
+                    "city": "Massillon",
+                    "state": "OH",
+                    "zip": 44646,
+                }
+            ],
+            "previous_residences": {"cities": [{"city_name": "Foo York City"}]},
+        },
+        {
+            "addresses": [
+                {
+                    "number": 8449,
+                    "street": "Spring St.",
+                    "city": "Elizabethton",
+                    "state": "TN",
+                    "zip": 37643,
+                }
+            ],
+            "previous_residences": {"cities": [{"city_name": "Barmingham"}]},
+        },
+    ]
+@pytest.fixture
+def max_level_test_input_data():
+    """
+    input data to test json_normalize with max_level param
+    """
+    return [
+        {
+            "CreatedBy": {"Name": "User001"},
+            "Lookup": {
+                "TextField": "Some text",
+                "UserField": {"Id": "ID001", "Name": "Name001"},
+            },
+            "Image": {"a": "b"},
+        }
+    ]
+class TestJSONNormalize:
+    def test_simple_records(self):
+        recs = [
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 4, "b": 5, "c": 6},
+            {"a": 7, "b": 8, "c": 9},
+            {"a": 10, "b": 11, "c": 12},
+        ]
+        result = json_normalize(recs)
+        expected = DataFrame(recs)
+        tm.assert_frame_equal(result, expected)
+    def test_simple_normalize(self, state_data):
+        result = json_normalize(state_data[0], "counties")
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+        result = json_normalize(state_data, "counties")
+        expected = []
+        for rec in state_data:
+            expected.extend(rec["counties"])
+        expected = DataFrame(expected)
+        tm.assert_frame_equal(result, expected)
+        result = json_normalize(state_data, "counties", meta="state")
+        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
+        tm.assert_frame_equal(result, expected)
+    def test_fields_list_type_normalize(self):
+        parse_metadata_fields_list_type = [
+            {"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}
+        ]
+        result = json_normalize(
+            parse_metadata_fields_list_type,
+            record_path=["values"],
+            meta=[["metadata", "listdata"]],
+        )
+        expected = DataFrame(
+            {0: [1, 2, 3], "metadata.listdata": [[1, 2], [1, 2], [1, 2]]}
+        )
+        tm.assert_frame_equal(result, expected)
+    def test_empty_array(self):
+        result = json_normalize([])
+        expected = DataFrame()
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "data, record_path, exception_type",
+        [
+            ([{"a": 0}, {"a": 1}], None, None),
+            ({"a": [{"a": 0}, {"a": 1}]}, "a", None),
+            ('{"a": [{"a": 0}, {"a": 1}]}', None, NotImplementedError),
+            (None, None, NotImplementedError),
+        ],
+    )
+    def test_accepted_input(self, data, record_path, exception_type):
+        if exception_type is not None:
+            with pytest.raises(exception_type, match=""):
+                json_normalize(data, record_path=record_path)
+        else:
+            result = json_normalize(data, record_path=record_path)
+            expected = DataFrame([0, 1], columns=["a"])
+            tm.assert_frame_equal(result, expected)
+    def test_simple_normalize_with_separator(self, deep_nested):
+        # GH 14883
+        result = json_normalize({"A": {"A": 1, "B": 2}})
+        expected = DataFrame([[1, 2]], columns=["A.A", "A.B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="_")
+        expected = DataFrame([[1, 2]], columns=["A_A", "A_B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="\u03c3")
+        expected = DataFrame([[1, 2]], columns=["A\u03c3A", "A\u03c3B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+        result = json_normalize(
+            deep_nested,
+            ["states", "cities"],
+            meta=["country", ["states", "name"]],
+            sep="_",
+        )
+        expected = Index(["name", "pop", "country", "states_name"]).sort_values()
+        assert result.columns.sort_values().equals(expected)
+    def test_normalize_with_multichar_separator(self):
+        # GH #43831
+        data = {"a": [1, 2], "b": {"b_1": 2, "b_2": (3, 4)}}
+        result = json_normalize(data, sep="__")
+        expected = DataFrame([[[1, 2], 2, (3, 4)]], columns=["a", "b__b_1", "b__b_2"])
+        tm.assert_frame_equal(result, expected)
+    def test_value_array_record_prefix(self):
+        # GH 21536
+        result = json_normalize({"A": [1, 2]}, "A", record_prefix="Prefix.")
+        expected = DataFrame([[1], [2]], columns=["Prefix.0"])
+        tm.assert_frame_equal(result, expected)
+    def test_nested_object_record_path(self):
+        # GH 22706
+        data = {
+            "state": "Florida",
+            "info": {
+                "governor": "Rick Scott",
+                "counties": [
+                    {"name": "Dade", "population": 12345},
+                    {"name": "Broward", "population": 40000},
+                    {"name": "Palm Beach", "population": 60000},
+                ],
+            },
+        }
+        result = json_normalize(data, record_path=["info", "counties"])
+        expected = DataFrame(
+            [["Dade", 12345], ["Broward", 40000], ["Palm Beach", 60000]],
+            columns=["name", "population"],
+        )
+        tm.assert_frame_equal(result, expected)
+    def test_more_deeply_nested(self, deep_nested):
+        result = json_normalize(
+            deep_nested, ["states", "cities"], meta=["country", ["states", "name"]]
+        )
+        ex_data = {
+            "country": ["USA"] * 4 + ["Germany"] * 3,
+            "states.name": [
+                "California",
+                "California",
+                "Ohio",
+                "Ohio",
+                "Bayern",
+                "Nordrhein-Westfalen",
+                "Nordrhein-Westfalen",
+            ],
+            "name": [
+                "San Francisco",
+                "Los Angeles",
+                "Columbus",
+                "Cleveland",
+                "Munich",
+                "Duesseldorf",
+                "Koeln",
+            ],
+            "pop": [12345, 12346, 1234, 1236, 12347, 1238, 1239],
+        }
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+    def test_shallow_nested(self):
+        data = [
+            {
+                "state": "Florida",
+                "shortname": "FL",
+                "info": {"governor": "Rick Scott"},
+                "counties": [
+                    {"name": "Dade", "population": 12345},
+                    {"name": "Broward", "population": 40000},
+                    {"name": "Palm Beach", "population": 60000},
+                ],
+            },
+            {
+                "state": "Ohio",
+                "shortname": "OH",
+                "info": {"governor": "John Kasich"},
+                "counties": [
+                    {"name": "Summit", "population": 1234},
+                    {"name": "Cuyahoga", "population": 1337},
+                ],
+            },
+        ]
+        result = json_normalize(
+            data, "counties", ["state", "shortname", ["info", "governor"]]
+        )
+        ex_data = {
+            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
+            "state": ["Florida"] * 3 + ["Ohio"] * 2,
+            "shortname": ["FL", "FL", "FL", "OH", "OH"],
+            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
+            "population": [12345, 40000, 60000, 1234, 1337],
+        }
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+    def test_nested_meta_path_with_nested_record_path(self, state_data):
+        # GH 27220
+        result = json_normalize(
+            data=state_data,
+            record_path=["counties"],
+            meta=["state", "shortname", ["info", "governor"]],
+            errors="ignore",
+        )
+        ex_data = {
+            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
+            "population": [12345, 40000, 60000, 1234, 1337],
+            "state": ["Florida"] * 3 + ["Ohio"] * 2,
+            "shortname": ["FL"] * 3 + ["OH"] * 2,
+            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
+        }
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+    def test_meta_name_conflict(self):
+        data = [
+            {
+                "foo": "hello",
+                "bar": "there",
+                "data": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+        msg = r"Conflicting metadata name (foo|bar), need distinguishing prefix"
+        with pytest.raises(ValueError, match=msg):
+            json_normalize(data, "data", meta=["foo", "bar"])
+        result = json_normalize(data, "data", meta=["foo", "bar"], meta_prefix="meta")
+        for val in ["metafoo", "metabar", "foo", "bar"]:
+            assert val in result
+    def test_meta_parameter_not_modified(self):
+        # GH 18610
+        data = [
+            {
+                "foo": "hello",
+                "bar": "there",
+                "data": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+        COLUMNS = ["foo", "bar"]
+        result = json_normalize(data, "data", meta=COLUMNS, meta_prefix="meta")
+        assert COLUMNS == ["foo", "bar"]
+        for val in ["metafoo", "metabar", "foo", "bar"]:
+            assert val in result
+    def test_record_prefix(self, state_data):
+        result = json_normalize(state_data[0], "counties")
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+        result = json_normalize(
+            state_data, "counties", meta="state", record_prefix="county_"
+        )
+        expected = []
+        for rec in state_data:
+            expected.extend(rec["counties"])
+        expected = DataFrame(expected)
+        expected = expected.rename(columns=lambda x: "county_" + x)
+        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
+        tm.assert_frame_equal(result, expected)
+    def test_non_ascii_key(self):
+        testjson = (
+            b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
+            b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
+        ).decode("utf8")
+        testdata = {
+            b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
+            "sub.A": [1, 3],
+            "sub.B": [2, 4],
+        }
+        expected = DataFrame(testdata)
+        result = json_normalize(json.loads(testjson))
+        tm.assert_frame_equal(result, expected)
+    def test_missing_field(self, author_missing_data):
+        # GH20030:
+        result = json_normalize(author_missing_data)
+        ex_data = [
+            {
+                "info": np.nan,
+                "info.created_at": np.nan,
+                "info.last_updated": np.nan,
+                "author_name.first": np.nan,
+                "author_name.last_name": np.nan,
+            },
+            {
+                "info": None,
+                "info.created_at": "11/08/1993",
+                "info.last_updated": "26/05/2012",
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
+            },
+        ]
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "max_level,expected",
+        [
+            (
+                0,
+                [
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                ],
+            ),
+            (
+                1,
+                [
+                    {
+                        "TextField": "Some text",
+                        "UserField.Id": "ID001",
+                        "UserField.Name": "Name001",
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField.Id": "ID001",
+                        "UserField.Name": "Name001",
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                ],
+            ),
+        ],
+    )
+    def test_max_level_with_records_path(self, max_level, expected):
+        # GH23843: Enhanced JSON normalize
+        test_input = [
+            {
+                "CreatedBy": {"Name": "User001"},
+                "Lookup": [
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                    },
+                ],
+                "Image": {"a": "b"},
+                "tags": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+        result = json_normalize(
+            test_input,
+            record_path=["Lookup"],
+            meta=[["CreatedBy"], ["Image"]],
+            max_level=max_level,
+        )
+        expected_df = DataFrame(data=expected, columns=result.columns.values)
+        tm.assert_equal(expected_df, result)
+    def test_nested_flattening_consistent(self):
+        # see gh-21537
+        df1 = json_normalize([{"A": {"B": 1}}])
+        df2 = json_normalize({"dummy": [{"A": {"B": 1}}]}, "dummy")
+        # They should be the same.
+        tm.assert_frame_equal(df1, df2)
+    def test_nonetype_record_path(self, nulls_fixture):
+        # see gh-30148
+        # should not raise TypeError
+        result = json_normalize(
+            [
+                {"state": "Texas", "info": nulls_fixture},
+                {"state": "Florida", "info": [{"i": 2}]},
+            ],
+            record_path=["info"],
+        )
+        expected = DataFrame({"i": 2}, index=[0])
+        tm.assert_equal(result, expected)
+    @pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"'])
+    def test_non_list_record_path_errors(self, value):
+        # see gh-30148, GH 26284
+        parsed_value = json.loads(value)
+        test_input = {"state": "Texas", "info": parsed_value}
+        test_path = "info"
+        msg = (
+            f"{test_input} has non list value {parsed_value} for path {test_path}. "
+            "Must be list or null."
+        )
+        with pytest.raises(TypeError, match=msg):
+            json_normalize([test_input], record_path=[test_path])
+    def test_meta_non_iterable(self):
+        # GH 31507
+        data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]"""
+        result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
+        expected = DataFrame(
+            {"one": [1], "two": [2], "id": np.array([99], dtype=object)}
+        )
+        tm.assert_frame_equal(result, expected)
+    def test_generator(self, state_data):
+        # GH35923 Fix pd.json_normalize to not skip the first element of a
+        # generator input
+        def generator_data():
+            yield from state_data[0]["counties"]
+        result = json_normalize(generator_data())
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+    def test_top_column_with_leading_underscore(self):
+        # 49861
+        data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
+        result = json_normalize(data, sep="_")
+        expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])
+        tm.assert_frame_equal(result, expected)
+class TestNestedToRecord:
+    def test_flat_stays_flat(self):
+        recs = [{"flat1": 1, "flat2": 2}, {"flat3": 3, "flat2": 4}]
+        result = nested_to_record(recs)
+        expected = recs
+        assert result == expected
+    def test_one_level_deep_flattens(self):
+        data = {"flat1": 1, "dict1": {"c": 1, "d": 2}}
+        result = nested_to_record(data)
+        expected = {"dict1.c": 1, "dict1.d": 2, "flat1": 1}
+        assert result == expected
+    def test_nested_flattens(self):
+        data = {
+            "flat1": 1,
+            "dict1": {"c": 1, "d": 2},
+            "nested": {"e": {"c": 1, "d": 2}, "d": 2},
+        }
+        result = nested_to_record(data)
+        expected = {
+            "dict1.c": 1,
+            "dict1.d": 2,
+            "flat1": 1,
+            "nested.d": 2,
+            "nested.e.c": 1,
+            "nested.e.d": 2,
+        }
+        assert result == expected
+    def test_json_normalize_errors(self, missing_metadata):
+        # GH14583:
+        # If meta keys are not always present a new option to set
+        # errors='ignore' has been implemented
+        msg = (
+            "Key 'name' not found. To replace missing values of "
+            "'name' with np.nan, pass in errors='ignore'"
+        )
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=missing_metadata,
+                record_path="addresses",
+                meta="name",
+                errors="raise",
+            )
+    def test_missing_meta(self, missing_metadata):
+        # GH25468
+        # If metadata is nullable with errors set to ignore, the null values
+        # should be numpy.nan values
+        result = json_normalize(
+            data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
+        )
+        ex_data = [
+            [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
+            [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
+        ]
+        columns = ["number", "street", "city", "state", "zip", "name"]
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
+    def test_missing_nested_meta(self):
+        # GH44312
+        # If errors="ignore" and nested metadata is null, we should return nan
+        data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
+        result = json_normalize(
+            data,
+            record_path="value",
+            meta=["meta", ["nested_meta", "leaf"]],
+            errors="ignore",
+        )
+        ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
+        columns = ["rec", "meta", "nested_meta.leaf"]
+        expected = DataFrame(ex_data, columns=columns).astype(
+            {"nested_meta.leaf": object}
+        )
+        tm.assert_frame_equal(result, expected)
+        # If errors="raise" and nested metadata is null, we should raise with the
+        # key of the first missing level
+        with pytest.raises(KeyError, match="'leaf' not found"):
+            json_normalize(
+                data,
+                record_path="value",
+                meta=["meta", ["nested_meta", "leaf"]],
+                errors="raise",
+            )
+    def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata):
+        # GH41876
+        # Ensure errors='raise' works as intended even when a record_path of length
+        # greater than one is passed in
+        msg = (
+            "Key 'name' not found. To replace missing values of "
+            "'name' with np.nan, pass in errors='ignore'"
+        )
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=missing_metadata,
+                record_path=["previous_residences", "cities"],
+                meta="name",
+                errors="raise",
+            )
+    def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata):
+        # GH41876
+        # Ensure errors='ignore' works as intended even when a record_path of length
+        # greater than one is passed in
+        result = json_normalize(
+            data=missing_metadata,
+            record_path=["previous_residences", "cities"],
+            meta="name",
+            errors="ignore",
+        )
+        ex_data = [
+            ["Foo York City", "Alice"],
+            ["Barmingham", np.nan],
+        ]
+        columns = ["city_name", "name"]
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
+    def test_donot_drop_nonevalues(self):
+        # GH21356
+        data = [
+            {"info": None, "author_name": {"first": "Smith", "last_name": "Appleseed"}},
+            {
+                "info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
+                "author_name": {"first": "Jane", "last_name": "Doe"},
+            },
+        ]
+        result = nested_to_record(data)
+        expected = [
+            {
+                "info": None,
+                "author_name.first": "Smith",
+                "author_name.last_name": "Appleseed",
+            },
+            {
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
+                "info.created_at": "11/08/1993",
+                "info.last_updated": "26/05/2012",
+            },
+        ]
+        assert result == expected
+    def test_nonetype_top_level_bottom_level(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it does not do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "country": {
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "id": None,
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656,
+                        },
+                    }
+                }
+            },
+        }
+        result = nested_to_record(data)
+        expected = {
+            "id": None,
+            "location.country.state.id": None,
+            "location.country.state.town.info.id": None,
+            "location.country.state.town.info.region": None,
+            "location.country.state.town.info.x": 49.151580810546875,
+            "location.country.state.town.info.y": -33.148521423339844,
+            "location.country.state.town.info.z": 27.572303771972656,
+        }
+        assert result == expected
+    def test_nonetype_multiple_levels(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it does not do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "id": None,
+                "country": {
+                    "id": None,
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656,
+                        },
+                    },
+                },
+            },
+        }
+        result = nested_to_record(data)
+        expected = {
+            "id": None,
+            "location.id": None,
+            "location.country.id": None,
+            "location.country.state.id": None,
+            "location.country.state.town.info.region": None,
+            "location.country.state.town.info.x": 49.151580810546875,
+            "location.country.state.town.info.y": -33.148521423339844,
+            "location.country.state.town.info.z": 27.572303771972656,
+        }
+        assert result == expected
+    @pytest.mark.parametrize(
+        "max_level, expected",
+        [
+            (
+                None,
+                [
+                    {
+                        "CreatedBy.Name": "User001",
+                        "Lookup.TextField": "Some text",
+                        "Lookup.UserField.Id": "ID001",
+                        "Lookup.UserField.Name": "Name001",
+                        "Image.a": "b",
+                    }
+                ],
+            ),
+            (
+                0,
+                [
+                    {
+                        "CreatedBy": {"Name": "User001"},
+                        "Lookup": {
+                            "TextField": "Some text",
+                            "UserField": {"Id": "ID001", "Name": "Name001"},
+                        },
+                        "Image": {"a": "b"},
+                    }
+                ],
+            ),
+            (
+                1,
+                [
+                    {
+                        "CreatedBy.Name": "User001",
+                        "Lookup.TextField": "Some text",
+                        "Lookup.UserField": {"Id": "ID001", "Name": "Name001"},
+                        "Image.a": "b",
+                    }
+                ],
+            ),
+        ],
+    )
+    def test_with_max_level(self, max_level, expected, max_level_test_input_data):
+        # GH23843: Enhanced JSON normalize
+        output = nested_to_record(max_level_test_input_data, max_level=max_level)
+        assert output == expected
+    def test_with_large_max_level(self):
+        # GH23843: Enhanced JSON normalize
+        max_level = 100
+        input_data = [
+            {
+                "CreatedBy": {
+                    "user": {
+                        "name": {"firstname": "Leo", "LastName": "Thomson"},
+                        "family_tree": {
+                            "father": {
+                                "name": "Father001",
+                                "father": {
+                                    "Name": "Father002",
+                                    "father": {
+                                        "name": "Father003",
+                                        "father": {"Name": "Father004"},
+                                    },
+                                },
+                            }
+                        },
+                    }
+                }
+            }
+        ]
+        expected = [
+            {
+                "CreatedBy.user.name.firstname": "Leo",
+                "CreatedBy.user.name.LastName": "Thomson",
+                "CreatedBy.user.family_tree.father.name": "Father001",
+                "CreatedBy.user.family_tree.father.father.Name": "Father002",
+                "CreatedBy.user.family_tree.father.father.father.name": "Father003",
+                "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004",  # noqa: E501
+            }
+        ]
+        output = nested_to_record(input_data, max_level=max_level)
+        assert output == expected
+    def test_series_non_zero_index(self):
+        # GH 19020
+        data = {
+            0: {"id": 1, "name": "Foo", "elements": {"a": 1}},
+            1: {"id": 2, "name": "Bar", "elements": {"b": 2}},
+            2: {"id": 3, "name": "Baz", "elements": {"c": 3}},
+        }
+        s = Series(data)
+        s.index = [1, 2, 3]
+        result = json_normalize(s)
+        expected = DataFrame(
+            {
+                "id": [1, 2, 3],
+                "name": ["Foo", "Bar", "Baz"],
+                "elements.a": [1.0, np.nan, np.nan],
+                "elements.b": [np.nan, 2.0, np.nan],
+                "elements.c": [np.nan, np.nan, 3.0],
+            }
+        )
+        tm.assert_frame_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py ADDED Viewed

	@@ -0,0 +1,2188 @@

+import datetime
+from datetime import timedelta
+from decimal import Decimal
+from io import (
+    BytesIO,
+    StringIO,
+)
+import json
+import os
+import sys
+import time
+import numpy as np
+import pytest
+from pandas._config import using_string_dtype
+from pandas.compat import IS64
+import pandas.util._test_decorators as td
+import pandas as pd
+from pandas import (
+    NA,
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    RangeIndex,
+    Series,
+    Timestamp,
+    date_range,
+    read_json,
+)
+import pandas._testing as tm
+from pandas.io.json import ujson_dumps
+def test_literal_json_deprecation():
+    # PR 53409
+    expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+    msg = (
+        "Passing literal json to 'read_json' is deprecated and "
+        "will be removed in a future version. To read from a "
+        "literal string, wrap it in a 'StringIO' object."
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        try:
+            read_json(jsonl, lines=False)
+        except ValueError:
+            pass
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        read_json(expected.to_json(), lines=False)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
+        tm.assert_frame_equal(result, expected)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        try:
+            result = read_json(
+                '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n',
+                lines=False,
+            )
+        except ValueError:
+            pass
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        try:
+            result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=False)
+        except ValueError:
+            pass
+        tm.assert_frame_equal(result, expected)
+def assert_json_roundtrip_equal(result, expected, orient):
+    if orient in ("records", "values"):
+        expected = expected.reset_index(drop=True)
+    if orient == "values":
+        expected.columns = range(len(expected.columns))
+    tm.assert_frame_equal(result, expected)
+class TestPandasContainer:
+    @pytest.fixture
+    def categorical_frame(self):
+        data = {
+            c: np.random.default_rng(i).standard_normal(30)
+            for i, c in enumerate(list("ABCD"))
+        }
+        cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * 15
+        data["E"] = list(reversed(cat))
+        data["sort"] = np.arange(30, dtype="int64")
+        return DataFrame(data, index=pd.CategoricalIndex(cat, name="E"))
+    @pytest.fixture
+    def datetime_series(self):
+        # Same as usual datetime_series, but with index freq set to None,
+        #  since that doesn't round-trip, see GH#33711
+        ser = Series(
+            1.1 * np.arange(10, dtype=np.float64),
+            index=date_range("2020-01-01", periods=10),
+            name="ts",
+        )
+        ser.index = ser.index._with_freq(None)
+        return ser
+    @pytest.fixture
+    def datetime_frame(self):
+        # Same as usual datetime_frame, but with index freq set to None,
+        #  since that doesn't round-trip, see GH#33711
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((30, 4)),
+            columns=Index(list("ABCD")),
+            index=date_range("2000-01-01", periods=30, freq="B"),
+        )
+        df.index = df.index._with_freq(None)
+        return df
+    def test_frame_double_encoded_labels(self, orient):
+        df = DataFrame(
+            [["a", "b"], ["c", "d"]],
+            index=['index " 1', "index / 2"],
+            columns=["a \\ b", "y / z"],
+        )
+        data = StringIO(df.to_json(orient=orient))
+        result = read_json(data, orient=orient)
+        expected = df.copy()
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("orient", ["split", "records", "values"])
+    def test_frame_non_unique_index(self, orient):
+        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
+        data = StringIO(df.to_json(orient=orient))
+        result = read_json(data, orient=orient)
+        expected = df.copy()
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("orient", ["index", "columns"])
+    def test_frame_non_unique_index_raises(self, orient):
+        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
+        msg = f"DataFrame index must be unique for orient='{orient}'"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient=orient)
+    @pytest.mark.parametrize("orient", ["split", "values"])
+    @pytest.mark.parametrize(
+        "data",
+        [
+            [["a", "b"], ["c", "d"]],
+            [[1.5, 2.5], [3.5, 4.5]],
+            [[1, 2.5], [3, 4.5]],
+            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
+        ],
+    )
+    def test_frame_non_unique_columns(self, orient, data):
+        df = DataFrame(data, index=[1, 2], columns=["x", "x"])
+        result = read_json(
+            StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"]
+        )
+        if orient == "values":
+            expected = DataFrame(data)
+            if expected.iloc[:, 0].dtype == "datetime64[ns]":
+                # orient == "values" by default will write Timestamp objects out
+                # in milliseconds; these are internally stored in nanosecond,
+                # so divide to get where we need
+                # TODO: a to_epoch method would also solve; see GH 14772
+                expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
+        elif orient == "split":
+            expected = df
+            expected.columns = ["x", "x.1"]
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("orient", ["index", "columns", "records"])
+    def test_frame_non_unique_columns_raises(self, orient):
+        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])
+        msg = f"DataFrame columns must be unique for orient='{orient}'"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient=orient)
+    def test_frame_default_orient(self, float_frame):
+        assert float_frame.to_json() == float_frame.to_json(orient="columns")
+    @pytest.mark.parametrize("dtype", [False, float])
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
+        data = StringIO(float_frame.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
+        expected = float_frame
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("dtype", [False, np.int64])
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
+        data = StringIO(int_frame.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
+        expected = int_frame
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("dtype", [None, np.float64, int, "U3"])
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_str_axes(self, orient, convert_axes, dtype):
+        df = DataFrame(
+            np.zeros((200, 4)),
+            columns=[str(i) for i in range(4)],
+            index=[str(i) for i in range(200)],
+            dtype=dtype,
+        )
+        data = StringIO(df.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
+        expected = df.copy()
+        if not dtype:
+            expected = expected.astype(np.int64)
+        # index columns, and records orients cannot fully preserve the string
+        # dtype for axes as the index and column labels are used as keys in
+        # JSON objects. JSON keys are by definition strings, so there's no way
+        # to disambiguate whether those keys actually were strings or numeric
+        # beforehand and numeric wins out.
+        if convert_axes and (orient in ("index", "columns")):
+            expected.columns = expected.columns.astype(np.int64)
+            expected.index = expected.index.astype(np.int64)
+        elif orient == "records" and convert_axes:
+            expected.columns = expected.columns.astype(np.int64)
+        elif convert_axes and orient == "split":
+            expected.columns = expected.columns.astype(np.int64)
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_categorical(
+        self, request, orient, categorical_frame, convert_axes, using_infer_string
+    ):
+        # TODO: create a better frame to test with and improve coverage
+        if orient in ("index", "columns"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=f"Can't have duplicate index values for orient '{orient}')"
+                )
+            )
+        data = StringIO(categorical_frame.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes)
+        expected = categorical_frame.copy()
+        expected.index = expected.index.astype(
+            str if not using_infer_string else "str"
+        )  # Categorical not preserved
+        expected.index.name = None  # index names aren't preserved in JSON
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_empty(self, orient, convert_axes):
+        empty_frame = DataFrame()
+        data = StringIO(empty_frame.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes)
+        if orient == "split":
+            idx = Index([], dtype=(float if convert_axes else object))
+            expected = DataFrame(index=idx, columns=idx)
+        elif orient in ["index", "columns"]:
+            expected = DataFrame()
+        else:
+            expected = empty_frame.copy()
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
+        # TODO: improve coverage with date_format parameter
+        data = StringIO(datetime_frame.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes)
+        expected = datetime_frame.copy()
+        if not convert_axes:  # one off for ts handling
+            # DTI gets converted to epoch values
+            idx = expected.index.view(np.int64) // 1000000
+            if orient != "split":  # TODO: handle consistently across orients
+                idx = idx.astype(str)
+            expected.index = idx
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_roundtrip_mixed(self, orient, convert_axes):
+        index = Index(["a", "b", "c", "d", "e"])
+        values = {
+            "A": [0.0, 1.0, 2.0, 3.0, 4.0],
+            "B": [0.0, 1.0, 0.0, 1.0, 0.0],
+            "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
+            "D": [True, False, True, False, True],
+        }
+        df = DataFrame(data=values, index=index)
+        data = StringIO(df.to_json(orient=orient))
+        result = read_json(data, orient=orient, convert_axes=convert_axes)
+        expected = df.copy()
+        expected = expected.assign(**expected.select_dtypes("number").astype(np.int64))
+        assert_json_roundtrip_equal(result, expected, orient)
+    @pytest.mark.xfail(
+        reason="#50456 Column multiindex is stored and loaded differently",
+        raises=AssertionError,
+    )
+    @pytest.mark.parametrize(
+        "columns",
+        [
+            [["2022", "2022"], ["JAN", "FEB"]],
+            [["2022", "2023"], ["JAN", "JAN"]],
+            [["2022", "2022"], ["JAN", "JAN"]],
+        ],
+    )
+    def test_roundtrip_multiindex(self, columns):
+        df = DataFrame(
+            [[1, 2], [3, 4]],
+            columns=pd.MultiIndex.from_arrays(columns),
+        )
+        data = StringIO(df.to_json(orient="split"))
+        result = read_json(data, orient="split")
+        tm.assert_frame_equal(result, df)
+    @pytest.mark.parametrize(
+        "data,msg,orient",
+        [
+            ('{"key":b:a:d}', "Expected object or value", "columns"),
+            # too few indices
+            (
+                '{"columns":["A","B"],'
+                '"index":["2","3"],'
+                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
+                "|".join(
+                    [
+                        r"Length of values \(3\) does not match length of index \(2\)",
+                    ]
+                ),
+                "split",
+            ),
+            # too many columns
+            (
+                '{"columns":["A","B","C"],'
+                '"index":["1","2","3"],'
+                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
+                "3 columns passed, passed data had 2 columns",
+                "split",
+            ),
+            # bad key
+            (
+                '{"badkey":["A","B"],'
+                '"index":["2","3"],'
+                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
+                r"unexpected key\(s\): badkey",
+                "split",
+            ),
+        ],
+    )
+    def test_frame_from_json_bad_data_raises(self, data, msg, orient):
+        with pytest.raises(ValueError, match=msg):
+            read_json(StringIO(data), orient=orient)
+    @pytest.mark.parametrize("dtype", [True, False])
+    @pytest.mark.parametrize("convert_axes", [True, False])
+    def test_frame_from_json_missing_data(self, orient, convert_axes, dtype):
+        num_df = DataFrame([[1, 2], [4, 5, 6]])
+        result = read_json(
+            StringIO(num_df.to_json(orient=orient)),
+            orient=orient,
+            convert_axes=convert_axes,
+            dtype=dtype,
+        )
+        assert np.isnan(result.iloc[0, 2])
+        obj_df = DataFrame([["1", "2"], ["4", "5", "6"]])
+        result = read_json(
+            StringIO(obj_df.to_json(orient=orient)),
+            orient=orient,
+            convert_axes=convert_axes,
+            dtype=dtype,
+        )
+        assert np.isnan(result.iloc[0, 2])
+    @pytest.mark.parametrize("dtype", [True, False])
+    def test_frame_read_json_dtype_missing_value(self, dtype):
+        # GH28501 Parse missing values using read_json with dtype=False
+        # to NaN instead of None
+        result = read_json(StringIO("[null]"), dtype=dtype)
+        expected = DataFrame([np.nan])
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("inf", [np.inf, -np.inf])
+    @pytest.mark.parametrize("dtype", [True, False])
+    def test_frame_infinity(self, inf, dtype):
+        # infinities get mapped to nulls which get mapped to NaNs during
+        # deserialisation
+        df = DataFrame([[1, 2], [4, 5, 6]])
+        df.loc[0, 2] = inf
+        data = StringIO(df.to_json())
+        result = read_json(data, dtype=dtype)
+        assert np.isnan(result.iloc[0, 2])
+    @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
+    @pytest.mark.parametrize(
+        "value,precision,expected_val",
+        [
+            (0.95, 1, 1.0),
+            (1.95, 1, 2.0),
+            (-1.95, 1, -2.0),
+            (0.995, 2, 1.0),
+            (0.9995, 3, 1.0),
+            (0.99999999999999944, 15, 1.0),
+        ],
+    )
+    def test_frame_to_json_float_precision(self, value, precision, expected_val):
+        df = DataFrame([{"a_float": value}])
+        encoded = df.to_json(double_precision=precision)
+        assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'
+    def test_frame_to_json_except(self):
+        df = DataFrame([1, 2, 3])
+        msg = "Invalid value 'garbage' for option 'orient'"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient="garbage")
+    def test_frame_empty(self):
+        df = DataFrame(columns=["jim", "joe"])
+        assert not df._is_mixed_type
+        data = StringIO(df.to_json())
+        result = read_json(data, dtype=dict(df.dtypes))
+        tm.assert_frame_equal(result, df, check_index_type=False)
+    def test_frame_empty_to_json(self):
+        # GH 7445
+        df = DataFrame({"test": []}, index=[])
+        result = df.to_json(orient="columns")
+        expected = '{"test":{}}'
+        assert result == expected
+    def test_frame_empty_mixedtype(self):
+        # mixed type
+        df = DataFrame(columns=["jim", "joe"])
+        df["joe"] = df["joe"].astype("i8")
+        assert df._is_mixed_type
+        data = df.to_json()
+        tm.assert_frame_equal(
+            read_json(StringIO(data), dtype=dict(df.dtypes)),
+            df,
+            check_index_type=False,
+        )
+    def test_frame_mixedtype_orient(self):  # GH10289
+        vals = [
+            [10, 1, "foo", 0.1, 0.01],
+            [20, 2, "bar", 0.2, 0.02],
+            [30, 3, "baz", 0.3, 0.03],
+            [40, 4, "qux", 0.4, 0.04],
+        ]
+        df = DataFrame(
+            vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"]
+        )
+        assert df._is_mixed_type
+        right = df.copy()
+        for orient in ["split", "index", "columns"]:
+            inp = StringIO(df.to_json(orient=orient))
+            left = read_json(inp, orient=orient, convert_axes=False)
+            tm.assert_frame_equal(left, right)
+        right.index = RangeIndex(len(df))
+        inp = StringIO(df.to_json(orient="records"))
+        left = read_json(inp, orient="records", convert_axes=False)
+        tm.assert_frame_equal(left, right)
+        right.columns = RangeIndex(df.shape[1])
+        inp = StringIO(df.to_json(orient="values"))
+        left = read_json(inp, orient="values", convert_axes=False)
+        tm.assert_frame_equal(left, right)
+    def test_v12_compat(self, datapath):
+        dti = date_range("2000-01-03", "2000-01-07")
+        # freq doesn't roundtrip
+        dti = DatetimeIndex(np.asarray(dti), freq=None)
+        df = DataFrame(
+            [
+                [1.56808523, 0.65727391, 1.81021139, -0.17251653],
+                [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
+                [1.51493992, 0.11805825, 1.629455, -1.31506612],
+                [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
+                [0.05951614, -2.69652057, 1.28163262, 0.34703478],
+            ],
+            columns=["A", "B", "C", "D"],
+            index=dti,
+        )
+        df["date"] = Timestamp("19920106 18:21:32.12").as_unit("ns")
+        df.iloc[3, df.columns.get_loc("date")] = Timestamp("20130101")
+        df["modified"] = df["date"]
+        df.iloc[1, df.columns.get_loc("modified")] = pd.NaT
+        dirpath = datapath("io", "json", "data")
+        v12_json = os.path.join(dirpath, "tsframe_v012.json")
+        df_unser = read_json(v12_json)
+        tm.assert_frame_equal(df, df_unser)
+        df_iso = df.drop(["modified"], axis=1)
+        v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
+        df_unser_iso = read_json(v12_iso_json)
+        tm.assert_frame_equal(df_iso, df_unser_iso, check_column_type=False)
+    def test_blocks_compat_GH9037(self, using_infer_string):
+        index = date_range("20000101", periods=10, freq="h")
+        # freq doesn't round-trip
+        index = DatetimeIndex(list(index), freq=None)
+        df_mixed = DataFrame(
+            {
+                "float_1": [
+                    -0.92077639,
+                    0.77434435,
+                    1.25234727,
+                    0.61485564,
+                    -0.60316077,
+                    0.24653374,
+                    0.28668979,
+                    -2.51969012,
+                    0.95748401,
+                    -1.02970536,
+                ],
+                "int_1": [
+                    19680418,
+                    75337055,
+                    99973684,
+                    65103179,
+                    79373900,
+                    40314334,
+                    21290235,
+                    4991321,
+                    41903419,
+                    16008365,
+                ],
+                "str_1": [
+                    "78c608f1",
+                    "64a99743",
+                    "13d2ff52",
+                    "ca7f4af2",
+                    "97236474",
+                    "bde7e214",
+                    "1a6bde47",
+                    "b1190be5",
+                    "7a669144",
+                    "8d64d068",
+                ],
+                "float_2": [
+                    -0.0428278,
+                    -1.80872357,
+                    3.36042349,
+                    -0.7573685,
+                    -0.48217572,
+                    0.86229683,
+                    1.08935819,
+                    0.93898739,
+                    -0.03030452,
+                    1.43366348,
+                ],
+                "str_2": [
+                    "14f04af9",
+                    "d085da90",
+                    "4bcfac83",
+                    "81504caf",
+                    "2ffef4a9",
+                    "08e2f5c4",
+                    "07e1af03",
+                    "addbd4a7",
+                    "1f6a09ba",
+                    "4bfc4d87",
+                ],
+                "int_2": [
+                    86967717,
+                    98098830,
+                    51927505,
+                    20372254,
+                    12601730,
+                    20884027,
+                    34193846,
+                    10561746,
+                    24867120,
+                    76131025,
+                ],
+            },
+            index=index,
+        )
+        # JSON deserialisation always creates unicode strings
+        df_mixed.columns = df_mixed.columns.astype(
+            np.str_ if not using_infer_string else "str"
+        )
+        data = StringIO(df_mixed.to_json(orient="split"))
+        df_roundtrip = read_json(data, orient="split")
+        tm.assert_frame_equal(
+            df_mixed,
+            df_roundtrip,
+            check_index_type=True,
+            check_column_type=True,
+            by_blocks=True,
+            check_exact=True,
+        )
+    def test_frame_nonprintable_bytes(self):
+        # GH14256: failing column caused segfaults, if it is not the last one
+        class BinaryThing:
+            def __init__(self, hexed) -> None:
+                self.hexed = hexed
+                self.binary = bytes.fromhex(hexed)
+            def __str__(self) -> str:
+                return self.hexed
+        hexed = "574b4454ba8c5eb4f98a8f45"
+        binthing = BinaryThing(hexed)
+        # verify the proper conversion of printable content
+        df_printable = DataFrame({"A": [binthing.hexed]})
+        assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'
+        # check if non-printable content throws appropriate Exception
+        df_nonprintable = DataFrame({"A": [binthing]})
+        msg = "Unsupported UTF-8 sequence length when encoding string"
+        with pytest.raises(OverflowError, match=msg):
+            df_nonprintable.to_json()
+        # the same with multiple columns threw segfaults
+        df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"])
+        with pytest.raises(OverflowError, match=msg):
+            df_mixed.to_json()
+        # default_handler should resolve exceptions for non-string types
+        result = df_nonprintable.to_json(default_handler=str)
+        expected = f'{{"A":{{"0":"{hexed}"}}}}'
+        assert result == expected
+        assert (
+            df_mixed.to_json(default_handler=str)
+            == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
+        )
+    def test_label_overflow(self):
+        # GH14256: buffer length not checked when writing label
+        result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json()
+        expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}'
+        assert result == expected
+    def test_series_non_unique_index(self):
+        s = Series(["a", "b"], index=[1, 1])
+        msg = "Series index must be unique for orient='index'"
+        with pytest.raises(ValueError, match=msg):
+            s.to_json(orient="index")
+        tm.assert_series_equal(
+            s,
+            read_json(
+                StringIO(s.to_json(orient="split")), orient="split", typ="series"
+            ),
+        )
+        unserialized = read_json(
+            StringIO(s.to_json(orient="records")), orient="records", typ="series"
+        )
+        tm.assert_equal(s.values, unserialized.values)
+    def test_series_default_orient(self, string_series):
+        assert string_series.to_json() == string_series.to_json(orient="index")
+    def test_series_roundtrip_simple(self, orient, string_series, using_infer_string):
+        data = StringIO(string_series.to_json(orient=orient))
+        result = read_json(data, typ="series", orient=orient)
+        expected = string_series
+        if using_infer_string and orient in ("split", "index", "columns"):
+            # These schemas don't contain dtypes, so we infer string
+            expected.index = expected.index.astype("str")
+        if orient in ("values", "records"):
+            expected = expected.reset_index(drop=True)
+        if orient != "split":
+            expected.name = None
+        tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize("dtype", [False, None])
+    def test_series_roundtrip_object(self, orient, dtype, object_series):
+        data = StringIO(object_series.to_json(orient=orient))
+        result = read_json(data, typ="series", orient=orient, dtype=dtype)
+        expected = object_series
+        if orient in ("values", "records"):
+            expected = expected.reset_index(drop=True)
+        if orient != "split":
+            expected.name = None
+        if using_string_dtype():
+            expected = expected.astype("str")
+        tm.assert_series_equal(result, expected)
+    def test_series_roundtrip_empty(self, orient):
+        empty_series = Series([], index=[], dtype=np.float64)
+        data = StringIO(empty_series.to_json(orient=orient))
+        result = read_json(data, typ="series", orient=orient)
+        expected = empty_series.reset_index(drop=True)
+        if orient in ("split"):
+            expected.index = expected.index.astype(np.float64)
+        tm.assert_series_equal(result, expected)
+    def test_series_roundtrip_timeseries(self, orient, datetime_series):
+        data = StringIO(datetime_series.to_json(orient=orient))
+        result = read_json(data, typ="series", orient=orient)
+        expected = datetime_series
+        if orient in ("values", "records"):
+            expected = expected.reset_index(drop=True)
+        if orient != "split":
+            expected.name = None
+        tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize("dtype", [np.float64, int])
+    def test_series_roundtrip_numeric(self, orient, dtype):
+        s = Series(range(6), index=["a", "b", "c", "d", "e", "f"])
+        data = StringIO(s.to_json(orient=orient))
+        result = read_json(data, typ="series", orient=orient)
+        expected = s.copy()
+        if orient in ("values", "records"):
+            expected = expected.reset_index(drop=True)
+        tm.assert_series_equal(result, expected)
+    def test_series_to_json_except(self):
+        s = Series([1, 2, 3])
+        msg = "Invalid value 'garbage' for option 'orient'"
+        with pytest.raises(ValueError, match=msg):
+            s.to_json(orient="garbage")
+    def test_series_from_json_precise_float(self):
+        s = Series([4.56, 4.56, 4.56])
+        result = read_json(StringIO(s.to_json()), typ="series", precise_float=True)
+        tm.assert_series_equal(result, s, check_index_type=False)
+    def test_series_with_dtype(self):
+        # GH 21986
+        s = Series([4.56, 4.56, 4.56])
+        result = read_json(StringIO(s.to_json()), typ="series", dtype=np.int64)
+        expected = Series([4] * 3)
+        tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize(
+        "dtype,expected",
+        [
+            (True, Series(["2000-01-01"], dtype="datetime64[ns]")),
+            (False, Series([946684800000])),
+        ],
+    )
+    def test_series_with_dtype_datetime(self, dtype, expected):
+        s = Series(["2000-01-01"], dtype="datetime64[ns]")
+        data = StringIO(s.to_json())
+        result = read_json(data, typ="series", dtype=dtype)
+        tm.assert_series_equal(result, expected)
+    def test_frame_from_json_precise_float(self):
+        df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
+        result = read_json(StringIO(df.to_json()), precise_float=True)
+        tm.assert_frame_equal(result, df)
+    def test_typ(self):
+        s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64")
+        result = read_json(StringIO(s.to_json()), typ=None)
+        tm.assert_series_equal(result, s)
+    def test_reconstruction_index(self):
+        df = DataFrame([[1, 2, 3], [4, 5, 6]])
+        result = read_json(StringIO(df.to_json()))
+        tm.assert_frame_equal(result, df)
+        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
+        result = read_json(StringIO(df.to_json()))
+        tm.assert_frame_equal(result, df)
+    def test_path(self, float_frame, int_frame, datetime_frame):
+        with tm.ensure_clean("test.json") as path:
+            for df in [float_frame, int_frame, datetime_frame]:
+                df.to_json(path)
+                read_json(path)
+    def test_axis_dates(self, datetime_series, datetime_frame):
+        # frame
+        json = StringIO(datetime_frame.to_json())
+        result = read_json(json)
+        tm.assert_frame_equal(result, datetime_frame)
+        # series
+        json = StringIO(datetime_series.to_json())
+        result = read_json(json, typ="series")
+        tm.assert_series_equal(result, datetime_series, check_names=False)
+        assert result.name is None
+    def test_convert_dates(self, datetime_series, datetime_frame):
+        # frame
+        df = datetime_frame
+        df["date"] = Timestamp("20130101").as_unit("ns")
+        json = StringIO(df.to_json())
+        result = read_json(json)
+        tm.assert_frame_equal(result, df)
+        df["foo"] = 1.0
+        json = StringIO(df.to_json(date_unit="ns"))
+        result = read_json(json, convert_dates=False)
+        expected = df.copy()
+        expected["date"] = expected["date"].values.view("i8")
+        expected["foo"] = expected["foo"].astype("int64")
+        tm.assert_frame_equal(result, expected)
+        # series
+        ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index)
+        json = StringIO(ts.to_json())
+        result = read_json(json, typ="series")
+        tm.assert_series_equal(result, ts)
+    @pytest.mark.parametrize("date_format", ["epoch", "iso"])
+    @pytest.mark.parametrize("as_object", [True, False])
+    @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, Timestamp])
+    def test_date_index_and_values(self, date_format, as_object, date_typ):
+        data = [date_typ(year=2020, month=1, day=1), pd.NaT]
+        if as_object:
+            data.append("a")
+        ser = Series(data, index=data)
+        result = ser.to_json(date_format=date_format)
+        if date_format == "epoch":
+            expected = '{"1577836800000":1577836800000,"null":null}'
+        else:
+            expected = (
+                '{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}'
+            )
+        if as_object:
+            expected = expected.replace("}", ',"a":"a"}')
+        assert result == expected
+    @pytest.mark.parametrize(
+        "infer_word",
+        [
+            "trade_time",
+            "date",
+            "datetime",
+            "sold_at",
+            "modified",
+            "timestamp",
+            "timestamps",
+        ],
+    )
+    def test_convert_dates_infer(self, infer_word):
+        # GH10747
+        data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
+        expected = DataFrame(
+            [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
+        )
+        result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "date,date_unit",
+        [
+            ("20130101 20:43:42.123", None),
+            ("20130101 20:43:42", "s"),
+            ("20130101 20:43:42.123", "ms"),
+            ("20130101 20:43:42.123456", "us"),
+            ("20130101 20:43:42.123456789", "ns"),
+        ],
+    )
+    def test_date_format_frame(self, date, date_unit, datetime_frame):
+        df = datetime_frame
+        df["date"] = Timestamp(date).as_unit("ns")
+        df.iloc[1, df.columns.get_loc("date")] = pd.NaT
+        df.iloc[5, df.columns.get_loc("date")] = pd.NaT
+        if date_unit:
+            json = df.to_json(date_format="iso", date_unit=date_unit)
+        else:
+            json = df.to_json(date_format="iso")
+        result = read_json(StringIO(json))
+        expected = df.copy()
+        tm.assert_frame_equal(result, expected)
+    def test_date_format_frame_raises(self, datetime_frame):
+        df = datetime_frame
+        msg = "Invalid value 'foo' for option 'date_unit'"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(date_format="iso", date_unit="foo")
+    @pytest.mark.parametrize(
+        "date,date_unit",
+        [
+            ("20130101 20:43:42.123", None),
+            ("20130101 20:43:42", "s"),
+            ("20130101 20:43:42.123", "ms"),
+            ("20130101 20:43:42.123456", "us"),
+            ("20130101 20:43:42.123456789", "ns"),
+        ],
+    )
+    def test_date_format_series(self, date, date_unit, datetime_series):
+        ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
+        ts.iloc[1] = pd.NaT
+        ts.iloc[5] = pd.NaT
+        if date_unit:
+            json = ts.to_json(date_format="iso", date_unit=date_unit)
+        else:
+            json = ts.to_json(date_format="iso")
+        result = read_json(StringIO(json), typ="series")
+        expected = ts.copy()
+        tm.assert_series_equal(result, expected)
+    def test_date_format_series_raises(self, datetime_series):
+        ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index)
+        msg = "Invalid value 'foo' for option 'date_unit'"
+        with pytest.raises(ValueError, match=msg):
+            ts.to_json(date_format="iso", date_unit="foo")
+    @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
+    def test_date_unit(self, unit, datetime_frame):
+        df = datetime_frame
+        df["date"] = Timestamp("20130101 20:43:42").as_unit("ns")
+        dl = df.columns.get_loc("date")
+        df.iloc[1, dl] = Timestamp("19710101 20:43:42")
+        df.iloc[2, dl] = Timestamp("21460101 20:43:42")
+        df.iloc[4, dl] = pd.NaT
+        json = df.to_json(date_format="epoch", date_unit=unit)
+        # force date unit
+        result = read_json(StringIO(json), date_unit=unit)
+        tm.assert_frame_equal(result, df)
+        # detect date unit
+        result = read_json(StringIO(json), date_unit=None)
+        tm.assert_frame_equal(result, df)
+    @pytest.mark.parametrize("unit", ["s", "ms", "us"])
+    def test_iso_non_nano_datetimes(self, unit):
+        # Test that numpy datetimes
+        # in an Index or a column with non-nano resolution can be serialized
+        # correctly
+        # GH53686
+        index = DatetimeIndex(
+            [np.datetime64("2023-01-01T11:22:33.123456", unit)],
+            dtype=f"datetime64[{unit}]",
+        )
+        df = DataFrame(
+            {
+                "date": Series(
+                    [np.datetime64("2022-01-01T11:22:33.123456", unit)],
+                    dtype=f"datetime64[{unit}]",
+                    index=index,
+                ),
+                "date_obj": Series(
+                    [np.datetime64("2023-01-01T11:22:33.123456", unit)],
+                    dtype=object,
+                    index=index,
+                ),
+            },
+        )
+        buf = StringIO()
+        df.to_json(buf, date_format="iso", date_unit=unit)
+        buf.seek(0)
+        # read_json always reads datetimes in nanosecond resolution
+        # TODO: check_dtype/check_index_type should be removable
+        # once read_json gets non-nano support
+        tm.assert_frame_equal(
+            read_json(buf, convert_dates=["date", "date_obj"]),
+            df,
+            check_index_type=False,
+            check_dtype=False,
+        )
+    def test_weird_nested_json(self):
+        # this used to core dump the parser
+        s = r"""{
+        "status": "success",
+        "data": {
+        "posts": [
+            {
+            "id": 1,
+            "title": "A blog post",
+            "body": "Some useful content"
+            },
+            {
+            "id": 2,
+            "title": "Another blog post",
+            "body": "More content"
+            }
+           ]
+          }
+        }"""
+        read_json(StringIO(s))
+    def test_doc_example(self):
+        dfj2 = DataFrame(
+            np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
+        )
+        dfj2["date"] = Timestamp("20130101")
+        dfj2["ints"] = range(5)
+        dfj2["bools"] = True
+        dfj2.index = date_range("20130101", periods=5)
+        json = StringIO(dfj2.to_json())
+        result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
+        tm.assert_frame_equal(result, result)
+    def test_round_trip_exception(self, datapath):
+        # GH 3867
+        path = datapath("io", "json", "data", "teams.csv")
+        df = pd.read_csv(path)
+        s = df.to_json()
+        result = read_json(StringIO(s))
+        res = result.reindex(index=df.index, columns=df.columns)
+        msg = "The 'downcast' keyword in fillna is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = res.fillna(np.nan, downcast=False)
+        tm.assert_frame_equal(res, df)
+    @pytest.mark.network
+    @pytest.mark.single_cpu
+    @pytest.mark.parametrize(
+        "field,dtype",
+        [
+            ["created_at", pd.DatetimeTZDtype(tz="UTC")],
+            ["closed_at", "datetime64[ns]"],
+            ["updated_at", pd.DatetimeTZDtype(tz="UTC")],
+        ],
+    )
+    def test_url(self, field, dtype, httpserver):
+        data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n'  # noqa: E501
+        httpserver.serve_content(content=data)
+        result = read_json(httpserver.url, convert_dates=True)
+        assert result[field].dtype == dtype
+    def test_timedelta(self):
+        converter = lambda x: pd.to_timedelta(x, unit="ms")
+        ser = Series([timedelta(23), timedelta(seconds=5)])
+        assert ser.dtype == "timedelta64[ns]"
+        result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
+        tm.assert_series_equal(result, ser)
+        ser = Series([timedelta(23), timedelta(seconds=5)], index=Index([0, 1]))
+        assert ser.dtype == "timedelta64[ns]"
+        result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
+        tm.assert_series_equal(result, ser)
+        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
+        assert frame[0].dtype == "timedelta64[ns]"
+        tm.assert_frame_equal(
+            frame, read_json(StringIO(frame.to_json())).apply(converter)
+        )
+    def test_timedelta2(self):
+        frame = DataFrame(
+            {
+                "a": [timedelta(days=23), timedelta(seconds=5)],
+                "b": [1, 2],
+                "c": date_range(start="20130101", periods=2),
+            }
+        )
+        data = StringIO(frame.to_json(date_unit="ns"))
+        result = read_json(data)
+        result["a"] = pd.to_timedelta(result.a, unit="ns")
+        result["c"] = pd.to_datetime(result.c)
+        tm.assert_frame_equal(frame, result)
+    def test_mixed_timedelta_datetime(self):
+        td = timedelta(23)
+        ts = Timestamp("20130101")
+        frame = DataFrame({"a": [td, ts]}, dtype=object)
+        expected = DataFrame(
+            {"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]}
+        )
+        data = StringIO(frame.to_json(date_unit="ns"))
+        result = read_json(data, dtype={"a": "int64"})
+        tm.assert_frame_equal(result, expected, check_index_type=False)
+    @pytest.mark.parametrize("as_object", [True, False])
+    @pytest.mark.parametrize("date_format", ["iso", "epoch"])
+    @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
+    def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
+        # GH28156: to_json not correctly formatting Timedelta
+        data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT]
+        if as_object:
+            data.append("a")
+        ser = Series(data, index=data)
+        if date_format == "iso":
+            expected = (
+                '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}'
+            )
+        else:
+            expected = '{"86400000":86400000,"172800000":172800000,"null":null}'
+        if as_object:
+            expected = expected.replace("}", ',"a":"a"}')
+        result = ser.to_json(date_format=date_format)
+        assert result == expected
+    @pytest.mark.parametrize("as_object", [True, False])
+    @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
+    def test_timedelta_to_json_fractional_precision(self, as_object, timedelta_typ):
+        data = [timedelta_typ(milliseconds=42)]
+        ser = Series(data, index=data)
+        if as_object:
+            ser = ser.astype(object)
+        result = ser.to_json()
+        expected = '{"42":42}'
+        assert result == expected
+    def test_default_handler(self):
+        value = object()
+        frame = DataFrame({"a": [7, value]})
+        expected = DataFrame({"a": [7, str(value)]})
+        result = read_json(StringIO(frame.to_json(default_handler=str)))
+        tm.assert_frame_equal(expected, result, check_index_type=False)
+    def test_default_handler_indirect(self):
+        def default(obj):
+            if isinstance(obj, complex):
+                return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
+            return str(obj)
+        df_list = [
+            9,
+            DataFrame(
+                {"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]},
+                columns=["a", "b"],
+            ),
+        ]
+        expected = (
+            '[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
+            '["re",4.0],["im",-5.0]],"N\\/A"]]]'
+        )
+        assert (
+            ujson_dumps(df_list, default_handler=default, orient="values") == expected
+        )
+    def test_default_handler_numpy_unsupported_dtype(self):
+        # GH12554 to_json raises 'Unhandled numpy dtype 15'
+        df = DataFrame(
+            {"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]},
+            columns=["a", "b"],
+        )
+        expected = (
+            '[["(1+0j)","(nan+0j)"],'
+            '["(2.3+0j)","(nan+0j)"],'
+            '["(4-5j)","(1.2+0j)"]]'
+        )
+        assert df.to_json(default_handler=str, orient="values") == expected
+    def test_default_handler_raises(self):
+        msg = "raisin"
+        def my_handler_raises(obj):
+            raise TypeError(msg)
+        with pytest.raises(TypeError, match=msg):
+            DataFrame({"a": [1, 2, object()]}).to_json(
+                default_handler=my_handler_raises
+            )
+        with pytest.raises(TypeError, match=msg):
+            DataFrame({"a": [1, 2, complex(4, -5)]}).to_json(
+                default_handler=my_handler_raises
+            )
+    def test_categorical(self):
+        # GH4377 df.to_json segfaults with non-ndarray blocks
+        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
+        df["B"] = df["A"]
+        expected = df.to_json()
+        df["B"] = df["A"].astype("category")
+        assert expected == df.to_json()
+        s = df["A"]
+        sc = df["B"]
+        assert s.to_json() == sc.to_json()
+    def test_datetime_tz(self):
+        # GH4377 df.to_json segfaults with non-ndarray blocks
+        tz_range = date_range("20130101", periods=3, tz="US/Eastern")
+        tz_naive = tz_range.tz_convert("utc").tz_localize(None)
+        df = DataFrame({"A": tz_range, "B": date_range("20130101", periods=3)})
+        df_naive = df.copy()
+        df_naive["A"] = tz_naive
+        expected = df_naive.to_json()
+        assert expected == df.to_json()
+        stz = Series(tz_range)
+        s_naive = Series(tz_naive)
+        assert stz.to_json() == s_naive.to_json()
+    def test_sparse(self):
+        # GH4377 df.to_json segfaults with non-ndarray blocks
+        df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
+        df.loc[:8] = np.nan
+        sdf = df.astype("Sparse")
+        expected = df.to_json()
+        assert expected == sdf.to_json()
+        s = Series(np.random.default_rng(2).standard_normal(10))
+        s.loc[:8] = np.nan
+        ss = s.astype("Sparse")
+        expected = s.to_json()
+        assert expected == ss.to_json()
+    @pytest.mark.parametrize(
+        "ts",
+        [
+            Timestamp("2013-01-10 05:00:00Z"),
+            Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
+            Timestamp("2013-01-10 00:00:00-0500"),
+        ],
+    )
+    def test_tz_is_utc(self, ts):
+        exp = '"2013-01-10T05:00:00.000Z"'
+        assert ujson_dumps(ts, iso_dates=True) == exp
+        dt = ts.to_pydatetime()
+        assert ujson_dumps(dt, iso_dates=True) == exp
+    def test_tz_is_naive(self):
+        ts = Timestamp("2013-01-10 05:00:00")
+        exp = '"2013-01-10T05:00:00.000"'
+        assert ujson_dumps(ts, iso_dates=True) == exp
+        dt = ts.to_pydatetime()
+        assert ujson_dumps(dt, iso_dates=True) == exp
+    @pytest.mark.parametrize(
+        "tz_range",
+        [
+            date_range("2013-01-01 05:00:00Z", periods=2),
+            date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"),
+            date_range("2013-01-01 00:00:00-0500", periods=2),
+        ],
+    )
+    def test_tz_range_is_utc(self, tz_range):
+        exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
+        dfexp = (
+            '{"DT":{'
+            '"0":"2013-01-01T05:00:00.000Z",'
+            '"1":"2013-01-02T05:00:00.000Z"}}'
+        )
+        assert ujson_dumps(tz_range, iso_dates=True) == exp
+        dti = DatetimeIndex(tz_range)
+        # Ensure datetimes in object array are serialized correctly
+        # in addition to the normal DTI case
+        assert ujson_dumps(dti, iso_dates=True) == exp
+        assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
+        df = DataFrame({"DT": dti})
+        result = ujson_dumps(df, iso_dates=True)
+        assert result == dfexp
+        assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
+    def test_tz_range_is_naive(self):
+        dti = date_range("2013-01-01 05:00:00", periods=2)
+        exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
+        dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
+        # Ensure datetimes in object array are serialized correctly
+        # in addition to the normal DTI case
+        assert ujson_dumps(dti, iso_dates=True) == exp
+        assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
+        df = DataFrame({"DT": dti})
+        result = ujson_dumps(df, iso_dates=True)
+        assert result == dfexp
+        assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
+    def test_read_inline_jsonl(self):
+        # GH9180
+        result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True)
+        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.single_cpu
+    @td.skip_if_not_us_locale
+    def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
+        # GH17200
+        result = read_json(
+            f"s3n://{s3_public_bucket_with_data.name}/items.jsonl",
+            lines=True,
+            storage_options=s3so,
+        )
+        expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+    def test_read_local_jsonl(self):
+        # GH17200
+        with tm.ensure_clean("tmp_items.json") as path:
+            with open(path, "w", encoding="utf-8") as infile:
+                infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
+            result = read_json(path, lines=True)
+            expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+            tm.assert_frame_equal(result, expected)
+    def test_read_jsonl_unicode_chars(self):
+        # GH15132: non-ascii unicode characters
+        # \u201d == RIGHT DOUBLE QUOTATION MARK
+        # simulate file handle
+        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+        json = StringIO(json)
+        result = read_json(json, lines=True)
+        expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+        # simulate string
+        json = StringIO('{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n')
+        result = read_json(json, lines=True)
+        expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
+    def test_to_json_large_numbers(self, bigNum):
+        # GH34473
+        series = Series(bigNum, dtype=object, index=["articleId"])
+        json = series.to_json()
+        expected = '{"articleId":' + str(bigNum) + "}"
+        assert json == expected
+        df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
+        json = df.to_json()
+        expected = '{"0":{"articleId":' + str(bigNum) + "}}"
+        assert json == expected
+    @pytest.mark.parametrize("bigNum", [-(2**63) - 1, 2**64])
+    def test_read_json_large_numbers(self, bigNum):
+        # GH20599, 26068
+        json = StringIO('{"articleId":' + str(bigNum) + "}")
+        msg = r"Value is too small|Value is too big"
+        with pytest.raises(ValueError, match=msg):
+            read_json(json)
+        json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}")
+        with pytest.raises(ValueError, match=msg):
+            read_json(json)
+    def test_read_json_large_numbers2(self):
+        # GH18842
+        json = '{"articleId": "1404366058080022500245"}'
+        json = StringIO(json)
+        result = read_json(json, typ="series")
+        expected = Series(1.404366e21, index=["articleId"])
+        tm.assert_series_equal(result, expected)
+        json = '{"0": {"articleId": "1404366058080022500245"}}'
+        json = StringIO(json)
+        result = read_json(json)
+        expected = DataFrame(1.404366e21, index=["articleId"], columns=[0])
+        tm.assert_frame_equal(result, expected)
+    def test_to_jsonl(self):
+        # GH9180
+        df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+        result = df.to_json(orient="records", lines=True)
+        expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
+        assert result == expected
+        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
+        result = df.to_json(orient="records", lines=True)
+        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
+        assert result == expected
+        tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
+        # GH15096: escaped characters in columns and data
+        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
+        result = df.to_json(orient="records", lines=True)
+        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
+        assert result == expected
+        tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
+    # TODO: there is a near-identical test for pytables; can we share?
+    @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
+    @pytest.mark.parametrize(
+        "val",
+        [
+            [b"E\xc9, 17", b"", b"a", b"b", b"c"],
+            [b"E\xc9, 17", b"a", b"b", b"c"],
+            [b"EE, 17", b"", b"a", b"b", b"c"],
+            [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
+            [b"", b"a", b"b", b"c"],
+            [b"\xf8\xfc", b"a", b"b", b"c"],
+            [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
+            [np.nan, b"", b"b", b"c"],
+            [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
+        ],
+    )
+    @pytest.mark.parametrize("dtype", ["category", object])
+    def test_latin_encoding(self, dtype, val):
+        # GH 13774
+        ser = Series(
+            [x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
+            dtype=dtype,
+        )
+        encoding = "latin-1"
+        with tm.ensure_clean("test.json") as path:
+            ser.to_json(path, encoding=encoding)
+            retr = read_json(StringIO(path), encoding=encoding)
+            tm.assert_series_equal(ser, retr, check_categorical=False)
+    def test_data_frame_size_after_to_json(self):
+        # GH15344
+        df = DataFrame({"a": [str(1)]})
+        size_before = df.memory_usage(index=True, deep=True).sum()
+        df.to_json()
+        size_after = df.memory_usage(index=True, deep=True).sum()
+        assert size_before == size_after
+    @pytest.mark.parametrize(
+        "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
+    )
+    @pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]])
+    def test_from_json_to_json_table_index_and_columns(self, index, columns):
+        # GH25433 GH25435
+        expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
+        dfjson = expected.to_json(orient="table")
+        result = read_json(StringIO(dfjson), orient="table")
+        tm.assert_frame_equal(result, expected)
+    def test_from_json_to_json_table_dtypes(self):
+        # GH21345
+        expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
+        dfjson = expected.to_json(orient="table")
+        result = read_json(StringIO(dfjson), orient="table")
+        tm.assert_frame_equal(result, expected)
+    # TODO: We are casting to string which coerces None to NaN before casting back
+    # to object, ending up with incorrect na values
+    @pytest.mark.xfail(using_string_dtype(), reason="incorrect na conversion")
+    @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
+    def test_to_json_from_json_columns_dtypes(self, orient):
+        # GH21892 GH33205
+        expected = DataFrame.from_dict(
+            {
+                "Integer": Series([1, 2, 3], dtype="int64"),
+                "Float": Series([None, 2.0, 3.0], dtype="float64"),
+                "Object": Series([None, "", "c"], dtype="object"),
+                "Bool": Series([True, False, True], dtype="bool"),
+                "Category": Series(["a", "b", None], dtype="category"),
+                "Datetime": Series(
+                    ["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
+                ),
+            }
+        )
+        dfjson = expected.to_json(orient=orient)
+        result = read_json(
+            StringIO(dfjson),
+            orient=orient,
+            dtype={
+                "Integer": "int64",
+                "Float": "float64",
+                "Object": "object",
+                "Bool": "bool",
+                "Category": "category",
+                "Datetime": "datetime64[ns]",
+            },
+        )
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
+    def test_read_json_table_dtype_raises(self, dtype):
+        # GH21345
+        df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
+        dfjson = df.to_json(orient="table")
+        msg = "cannot pass both dtype and orient='table'"
+        with pytest.raises(ValueError, match=msg):
+            read_json(dfjson, orient="table", dtype=dtype)
+    @pytest.mark.parametrize("orient", ["index", "columns", "records", "values"])
+    def test_read_json_table_empty_axes_dtype(self, orient):
+        # GH28558
+        expected = DataFrame()
+        result = read_json(StringIO("{}"), orient=orient, convert_axes=True)
+        tm.assert_index_equal(result.index, expected.index)
+        tm.assert_index_equal(result.columns, expected.columns)
+    def test_read_json_table_convert_axes_raises(self):
+        # GH25433 GH25435
+        df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."])
+        dfjson = df.to_json(orient="table")
+        msg = "cannot pass both convert_axes and orient='table'"
+        with pytest.raises(ValueError, match=msg):
+            read_json(dfjson, orient="table", convert_axes=True)
+    @pytest.mark.parametrize(
+        "data, expected",
+        [
+            (
+                DataFrame([[1, 2], [4, 5]], columns=["a", "b"]),
+                {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
+            ),
+            (
+                DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"),
+                {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
+            ),
+            (
+                DataFrame(
+                    [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
+                ),
+                {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
+            ),
+            (Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}),
+            (
+                Series([1, 2, 3], name="A").rename_axis("foo"),
+                {"name": "A", "data": [1, 2, 3]},
+            ),
+            (
+                Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]),
+                {"name": "A", "data": [1, 2]},
+            ),
+        ],
+    )
+    def test_index_false_to_json_split(self, data, expected):
+        # GH 17394
+        # Testing index=False in to_json with orient='split'
+        result = data.to_json(orient="split", index=False)
+        result = json.loads(result)
+        assert result == expected
+    @pytest.mark.parametrize(
+        "data",
+        [
+            (DataFrame([[1, 2], [4, 5]], columns=["a", "b"])),
+            (DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")),
+            (
+                DataFrame(
+                    [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
+                )
+            ),
+            (Series([1, 2, 3], name="A")),
+            (Series([1, 2, 3], name="A").rename_axis("foo")),
+            (Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])),
+        ],
+    )
+    def test_index_false_to_json_table(self, data):
+        # GH 17394
+        # Testing index=False in to_json with orient='table'
+        result = data.to_json(orient="table", index=False)
+        result = json.loads(result)
+        expected = {
+            "schema": pd.io.json.build_table_schema(data, index=False),
+            "data": DataFrame(data).to_dict(orient="records"),
+        }
+        assert result == expected
+    @pytest.mark.parametrize("orient", ["index", "columns"])
+    def test_index_false_error_to_json(self, orient):
+        # GH 17394, 25513
+        # Testing error message from to_json with index=False
+        df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
+        msg = (
+            "'index=False' is only valid when 'orient' is 'split', "
+            "'table', 'records', or 'values'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient=orient, index=False)
+    @pytest.mark.parametrize("orient", ["records", "values"])
+    def test_index_true_error_to_json(self, orient):
+        # GH 25513
+        # Testing error message from to_json with index=True
+        df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
+        msg = (
+            "'index=True' is only valid when 'orient' is 'split', "
+            "'table', 'index', or 'columns'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient=orient, index=True)
+    @pytest.mark.parametrize("orient", ["split", "table"])
+    @pytest.mark.parametrize("index", [True, False])
+    def test_index_false_from_json_to_json(self, orient, index):
+        # GH25170
+        # Test index=False in from_json to_json
+        expected = DataFrame({"a": [1, 2], "b": [3, 4]})
+        dfjson = expected.to_json(orient=orient, index=index)
+        result = read_json(StringIO(dfjson), orient=orient)
+        tm.assert_frame_equal(result, expected)
+    def test_read_timezone_information(self):
+        # GH 25546
+        result = read_json(
+            StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
+        )
+        exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
+        expected = Series([88], index=exp_dti)
+        tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize(
+        "url",
+        [
+            "s3://example-fsspec/",
+            "gcs://another-fsspec/file.json",
+            "https://example-site.com/data",
+            "some-protocol://data.txt",
+        ],
+    )
+    def test_read_json_with_url_value(self, url):
+        # GH 36271
+        result = read_json(StringIO(f'{{"url":{{"0":"{url}"}}}}'))
+        expected = DataFrame({"url": [url]})
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "compression",
+        ["", ".gz", ".bz2", ".tar"],
+    )
+    def test_read_json_with_very_long_file_path(self, compression):
+        # GH 46718
+        long_json_path = f'{"a" * 1000}.json{compression}'
+        with pytest.raises(
+            FileNotFoundError, match=f"File {long_json_path} does not exist"
+        ):
+            # path too long for Windows is handled in file_exists() but raises in
+            # _get_data_from_filepath()
+            read_json(long_json_path)
+    @pytest.mark.parametrize(
+        "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
+    )
+    def test_timedelta_as_label(self, date_format, key):
+        df = DataFrame([[1]], columns=[pd.Timedelta("1D")])
+        expected = f'{{"{key}":{{"0":1}}}}'
+        result = df.to_json(date_format=date_format)
+        assert result == expected
+    @pytest.mark.parametrize(
+        "orient,expected",
+        [
+            ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"),
+            ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"),
+            # TODO: the below have separate encoding procedures
+            pytest.param(
+                "split",
+                "",
+                marks=pytest.mark.xfail(
+                    reason="Produces JSON but not in a consistent manner"
+                ),
+            ),
+            pytest.param(
+                "table",
+                "",
+                marks=pytest.mark.xfail(
+                    reason="Produces JSON but not in a consistent manner"
+                ),
+            ),
+        ],
+    )
+    def test_tuple_labels(self, orient, expected):
+        # GH 20500
+        df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
+        result = df.to_json(orient=orient)
+        assert result == expected
+    @pytest.mark.parametrize("indent", [1, 2, 4])
+    def test_to_json_indent(self, indent):
+        # GH 12004
+        df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
+        result = df.to_json(indent=indent)
+        spaces = " " * indent
+        expected = f"""{{
+{spaces}"a":{{
+{spaces}{spaces}"0":"foo",
+{spaces}{spaces}"1":"baz"
+{spaces}}},
+{spaces}"b":{{
+{spaces}{spaces}"0":"bar",
+{spaces}{spaces}"1":"qux"
+{spaces}}}
+}}"""
+        assert result == expected
+    @pytest.mark.skipif(
+        using_string_dtype(),
+        reason="Adjust expected when infer_string is default, no bug here, "
+        "just a complicated parametrization",
+    )
+    @pytest.mark.parametrize(
+        "orient,expected",
+        [
+            (
+                "split",
+                """{
+    "columns":[
+        "a",
+        "b"
+    ],
+    "index":[
+        0,
+        1
+    ],
+    "data":[
+        [
+            "foo",
+            "bar"
+        ],
+        [
+            "baz",
+            "qux"
+        ]
+    ]
+}""",
+            ),
+            (
+                "records",
+                """[
+    {
+        "a":"foo",
+        "b":"bar"
+    },
+    {
+        "a":"baz",
+        "b":"qux"
+    }
+]""",
+            ),
+            (
+                "index",
+                """{
+    "0":{
+        "a":"foo",
+        "b":"bar"
+    },
+    "1":{
+        "a":"baz",
+        "b":"qux"
+    }
+}""",
+            ),
+            (
+                "columns",
+                """{
+    "a":{
+        "0":"foo",
+        "1":"baz"
+    },
+    "b":{
+        "0":"bar",
+        "1":"qux"
+    }
+}""",
+            ),
+            (
+                "values",
+                """[
+    [
+        "foo",
+        "bar"
+    ],
+    [
+        "baz",
+        "qux"
+    ]
+]""",
+            ),
+            (
+                "table",
+                """{
+    "schema":{
+        "fields":[
+            {
+                "name":"index",
+                "type":"integer"
+            },
+            {
+                "name":"a",
+                "type":"string"
+            },
+            {
+                "name":"b",
+                "type":"string"
+            }
+        ],
+        "primaryKey":[
+            "index"
+        ],
+        "pandas_version":"1.4.0"
+    },
+    "data":[
+        {
+            "index":0,
+            "a":"foo",
+            "b":"bar"
+        },
+        {
+            "index":1,
+            "a":"baz",
+            "b":"qux"
+        }
+    ]
+}""",
+            ),
+        ],
+    )
+    def test_json_indent_all_orients(self, orient, expected):
+        # GH 12004
+        df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
+        result = df.to_json(orient=orient, indent=4)
+        assert result == expected
+    def test_json_negative_indent_raises(self):
+        with pytest.raises(ValueError, match="must be a nonnegative integer"):
+            DataFrame().to_json(indent=-1)
+    def test_emca_262_nan_inf_support(self):
+        # GH 12213
+        data = StringIO(
+            '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
+        )
+        result = read_json(data)
+        expected = DataFrame(
+            ["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
+        )
+        tm.assert_frame_equal(result, expected)
+    def test_frame_int_overflow(self):
+        # GH 30320
+        encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}])
+        expected = DataFrame({"col": ["31900441201190696999", "Text"]})
+        result = read_json(StringIO(encoded_json))
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "dataframe,expected",
+        [
+            (
+                DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}),
+                '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,'
+                '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}',
+            )
+        ],
+    )
+    def test_json_multiindex(self, dataframe, expected):
+        series = dataframe.stack(future_stack=True)
+        result = series.to_json(orient="index")
+        assert result == expected
+    @pytest.mark.single_cpu
+    def test_to_s3(self, s3_public_bucket, s3so):
+        # GH 28375
+        mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
+        df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
+        df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
+        timeout = 5
+        while True:
+            if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
+                break
+            time.sleep(0.1)
+            timeout -= 0.1
+            assert timeout > 0, "Timed out waiting for file to appear on moto"
+    def test_json_pandas_nulls(self, nulls_fixture, request):
+        # GH 31615
+        if isinstance(nulls_fixture, Decimal):
+            mark = pytest.mark.xfail(reason="not implemented")
+            request.applymarker(mark)
+        result = DataFrame([[nulls_fixture]]).to_json()
+        assert result == '{"0":{"0":null}}'
+    def test_readjson_bool_series(self):
+        # GH31464
+        result = read_json(StringIO("[true, true, false]"), typ="series")
+        expected = Series([True, True, False])
+        tm.assert_series_equal(result, expected)
+    def test_to_json_multiindex_escape(self):
+        # GH 15273
+        df = DataFrame(
+            True,
+            index=date_range("2017-01-20", "2017-01-23"),
+            columns=["foo", "bar"],
+        ).stack(future_stack=True)
+        result = df.to_json()
+        expected = (
+            "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true,"
+            "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true,"
+            "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true,"
+            "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true,"
+            "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true,"
+            "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true,"
+            "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true,"
+            "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}"
+        )
+        assert result == expected
+    def test_to_json_series_of_objects(self):
+        class _TestObject:
+            def __init__(self, a, b, _c, d) -> None:
+                self.a = a
+                self.b = b
+                self._c = _c
+                self.d = d
+            def e(self):
+                return 5
+        # JSON keys should be all non-callable non-underscore attributes, see GH-42768
+        series = Series([_TestObject(a=1, b=2, _c=3, d=4)])
+        assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}}
+    @pytest.mark.parametrize(
+        "data,expected",
+        [
+            (
+                Series({0: -6 + 8j, 1: 0 + 1j, 2: 9 - 5j}),
+                '{"0":{"imag":8.0,"real":-6.0},'
+                '"1":{"imag":1.0,"real":0.0},'
+                '"2":{"imag":-5.0,"real":9.0}}',
+            ),
+            (
+                Series({0: -9.39 + 0.66j, 1: 3.95 + 9.32j, 2: 4.03 - 0.17j}),
+                '{"0":{"imag":0.66,"real":-9.39},'
+                '"1":{"imag":9.32,"real":3.95},'
+                '"2":{"imag":-0.17,"real":4.03}}',
+            ),
+            (
+                DataFrame([[-2 + 3j, -1 - 0j], [4 - 3j, -0 - 10j]]),
+                '{"0":{"0":{"imag":3.0,"real":-2.0},'
+                '"1":{"imag":-3.0,"real":4.0}},'
+                '"1":{"0":{"imag":0.0,"real":-1.0},'
+                '"1":{"imag":-10.0,"real":0.0}}}',
+            ),
+            (
+                DataFrame(
+                    [[-0.28 + 0.34j, -1.08 - 0.39j], [0.41 - 0.34j, -0.78 - 1.35j]]
+                ),
+                '{"0":{"0":{"imag":0.34,"real":-0.28},'
+                '"1":{"imag":-0.34,"real":0.41}},'
+                '"1":{"0":{"imag":-0.39,"real":-1.08},'
+                '"1":{"imag":-1.35,"real":-0.78}}}',
+            ),
+        ],
+    )
+    def test_complex_data_tojson(self, data, expected):
+        # GH41174
+        result = data.to_json()
+        assert result == expected
+    def test_json_uint64(self):
+        # GH21073
+        expected = (
+            '{"columns":["col1"],"index":[0,1],'
+            '"data":[[13342205958987758245],[12388075603347835679]]}'
+        )
+        df = DataFrame(data={"col1": [13342205958987758245, 12388075603347835679]})
+        result = df.to_json(orient="split")
+        assert result == expected
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
+    def test_read_json_dtype_backend(
+        self, string_storage, dtype_backend, orient, using_infer_string
+    ):
+        # GH#50750
+        df = DataFrame(
+            {
+                "a": Series([1, np.nan, 3], dtype="Int64"),
+                "b": Series([1, 2, 3], dtype="Int64"),
+                "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
+                "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
+                "e": [True, False, None],
+                "f": [True, False, True],
+                "g": ["a", "b", "c"],
+                "h": ["a", "b", None],
+            }
+        )
+        out = df.to_json(orient=orient)
+        with pd.option_context("mode.string_storage", string_storage):
+            result = read_json(
+                StringIO(out), dtype_backend=dtype_backend, orient=orient
+            )
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+            string_dtype = pd.ArrowDtype(pa.string())
+        else:
+            string_dtype = pd.StringDtype(string_storage)
+        expected = DataFrame(
+            {
+                "a": Series([1, np.nan, 3], dtype="Int64"),
+                "b": Series([1, 2, 3], dtype="Int64"),
+                "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
+                "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
+                "e": Series([True, False, NA], dtype="boolean"),
+                "f": Series([True, False, True], dtype="boolean"),
+                "g": Series(["a", "b", "c"], dtype=string_dtype),
+                "h": Series(["a", "b", None], dtype=string_dtype),
+            }
+        )
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+            from pandas.arrays import ArrowExtensionArray
+            expected = DataFrame(
+                {
+                    col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
+                    for col in expected.columns
+                }
+            )
+        if orient == "values":
+            expected.columns = list(range(8))
+        # the storage of the str columns' Index is also affected by the
+        # string_storage setting -> ignore that for checking the result
+        tm.assert_frame_equal(result, expected, check_column_type=False)
+    @pytest.mark.parametrize("orient", ["split", "records", "index"])
+    def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
+        # GH#50750
+        pa = pytest.importorskip("pyarrow")
+        ser = Series([1, np.nan, 3], dtype="Int64")
+        out = ser.to_json(orient=orient)
+        with pd.option_context("mode.string_storage", string_storage):
+            result = read_json(
+                StringIO(out), dtype_backend=dtype_backend, orient=orient, typ="series"
+            )
+        expected = Series([1, np.nan, 3], dtype="Int64")
+        if dtype_backend == "pyarrow":
+            from pandas.arrays import ArrowExtensionArray
+            expected = Series(ArrowExtensionArray(pa.array(expected, from_pandas=True)))
+        tm.assert_series_equal(result, expected)
+    def test_invalid_dtype_backend(self):
+        msg = (
+            "dtype_backend numpy is invalid, only 'numpy_nullable' and "
+            "'pyarrow' are allowed."
+        )
+        with pytest.raises(ValueError, match=msg):
+            read_json("test", dtype_backend="numpy")
+def test_invalid_engine():
+    # GH 48893
+    ser = Series(range(1))
+    out = ser.to_json()
+    with pytest.raises(ValueError, match="The engine type foo"):
+        read_json(out, engine="foo")
+def test_pyarrow_engine_lines_false():
+    # GH 48893
+    ser = Series(range(1))
+    out = ser.to_json()
+    with pytest.raises(ValueError, match="currently pyarrow engine only supports"):
+        read_json(out, engine="pyarrow", lines=False)
+def test_json_roundtrip_string_inference(orient):
+    df = DataFrame(
+        [["a", "b"], ["c", "d"]], index=["row 1", "row 2"], columns=["col 1", "col 2"]
+    )
+    out = df.to_json()
+    with pd.option_context("future.infer_string", True):
+        result = read_json(StringIO(out))
+    dtype = pd.StringDtype(na_value=np.nan)
+    expected = DataFrame(
+        [["a", "b"], ["c", "d"]],
+        dtype=dtype,
+        index=Index(["row 1", "row 2"], dtype=dtype),
+        columns=Index(["col 1", "col 2"], dtype=dtype),
+    )
+    tm.assert_frame_equal(result, expected)
+def test_json_pos_args_deprecation():
+    # GH-54229
+    df = DataFrame({"a": [1, 2, 3]})
+    msg = (
+        r"Starting with pandas version 3.0 all arguments of to_json except for the "
+        r"argument 'path_or_buf' will be keyword-only."
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        buf = BytesIO()
+        df.to_json(buf, "split")
+@td.skip_if_no("pyarrow")
+def test_to_json_ea_null():
+    # GH#57224
+    df = DataFrame(
+        {
+            "a": Series([1, NA], dtype="int64[pyarrow]"),
+            "b": Series([2, NA], dtype="Int64"),
+        }
+    )
+    result = df.to_json(orient="records", lines=True)
+    expected = """{"a":1,"b":2}
+{"a":null,"b":null}
+"""
+    assert result == expected
+def test_read_json_lines_rangeindex():
+    # GH 57429
+    data = """
+{"a": 1, "b": 2}
+{"a": 3, "b": 4}
+"""
+    result = read_json(StringIO(data), lines=True).index
+    expected = RangeIndex(2)
+    tm.assert_index_equal(result, expected, exact=True)

py311/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py ADDED Viewed

	@@ -0,0 +1,1087 @@

+import calendar
+import datetime
+import decimal
+import json
+import locale
+import math
+import re
+import time
+import dateutil
+import numpy as np
+import pytest
+import pytz
+import pandas._libs.json as ujson
+from pandas.compat import IS64
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    NaT,
+    PeriodIndex,
+    Series,
+    Timedelta,
+    Timestamp,
+    date_range,
+)
+import pandas._testing as tm
+def _clean_dict(d):
+    """
+    Sanitize dictionary for JSON by converting all keys to strings.
+    Parameters
+    ----------
+    d : dict
+        The dictionary to convert.
+    Returns
+    -------
+    cleaned_dict : dict
+    """
+    return {str(k): v for k, v in d.items()}
+@pytest.fixture(
+    params=[None, "split", "records", "values", "index"]  # Column indexed by default.
+)
+def orient(request):
+    return request.param
+class TestUltraJSONTests:
+    @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
+    def test_encode_decimal(self):
+        sut = decimal.Decimal("1337.1337")
+        encoded = ujson.ujson_dumps(sut, double_precision=15)
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 1337.1337
+        sut = decimal.Decimal("0.95")
+        encoded = ujson.ujson_dumps(sut, double_precision=1)
+        assert encoded == "1.0"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 1.0
+        sut = decimal.Decimal("0.94")
+        encoded = ujson.ujson_dumps(sut, double_precision=1)
+        assert encoded == "0.9"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 0.9
+        sut = decimal.Decimal("1.95")
+        encoded = ujson.ujson_dumps(sut, double_precision=1)
+        assert encoded == "2.0"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 2.0
+        sut = decimal.Decimal("-1.95")
+        encoded = ujson.ujson_dumps(sut, double_precision=1)
+        assert encoded == "-2.0"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == -2.0
+        sut = decimal.Decimal("0.995")
+        encoded = ujson.ujson_dumps(sut, double_precision=2)
+        assert encoded == "1.0"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 1.0
+        sut = decimal.Decimal("0.9995")
+        encoded = ujson.ujson_dumps(sut, double_precision=3)
+        assert encoded == "1.0"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 1.0
+        sut = decimal.Decimal("0.99999999999999944")
+        encoded = ujson.ujson_dumps(sut, double_precision=15)
+        assert encoded == "1.0"
+        decoded = ujson.ujson_loads(encoded)
+        assert decoded == 1.0
+    @pytest.mark.parametrize("ensure_ascii", [True, False])
+    def test_encode_string_conversion(self, ensure_ascii):
+        string_input = "A string \\ / \b \f \n \r \t </script> &"
+        not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"'
+        html_encoded = (
+            '"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"'
+        )
+        def helper(expected_output, **encode_kwargs):
+            output = ujson.ujson_dumps(
+                string_input, ensure_ascii=ensure_ascii, **encode_kwargs
+            )
+            assert output == expected_output
+            assert string_input == json.loads(output)
+            assert string_input == ujson.ujson_loads(output)
+        # Default behavior assumes encode_html_chars=False.
+        helper(not_html_encoded)
+        # Make sure explicit encode_html_chars=False works.
+        helper(not_html_encoded, encode_html_chars=False)
+        # Make sure explicit encode_html_chars=True does the encoding.
+        helper(html_encoded, encode_html_chars=True)
+    @pytest.mark.parametrize(
+        "long_number", [-4342969734183514, -12345678901234.56789012, -528656961.4399388]
+    )
+    def test_double_long_numbers(self, long_number):
+        sut = {"a": long_number}
+        encoded = ujson.ujson_dumps(sut, double_precision=15)
+        decoded = ujson.ujson_loads(encoded)
+        assert sut == decoded
+    def test_encode_non_c_locale(self):
+        lc_category = locale.LC_NUMERIC
+        # We just need one of these locales to work.
+        for new_locale in ("it_IT.UTF-8", "Italian_Italy"):
+            if tm.can_set_locale(new_locale, lc_category):
+                with tm.set_locale(new_locale, lc_category):
+                    assert ujson.ujson_loads(ujson.ujson_dumps(4.78e60)) == 4.78e60
+                    assert ujson.ujson_loads("4.78", precise_float=True) == 4.78
+                break
+    def test_decimal_decode_test_precise(self):
+        sut = {"a": 4.56}
+        encoded = ujson.ujson_dumps(sut)
+        decoded = ujson.ujson_loads(encoded, precise_float=True)
+        assert sut == decoded
+    def test_encode_double_tiny_exponential(self):
+        num = 1e-40
+        assert num == ujson.ujson_loads(ujson.ujson_dumps(num))
+        num = 1e-100
+        assert num == ujson.ujson_loads(ujson.ujson_dumps(num))
+        num = -1e-45
+        assert num == ujson.ujson_loads(ujson.ujson_dumps(num))
+        num = -1e-145
+        assert np.allclose(num, ujson.ujson_loads(ujson.ujson_dumps(num)))
+    @pytest.mark.parametrize("unicode_key", ["key1", "بن"])
+    def test_encode_dict_with_unicode_keys(self, unicode_key):
+        unicode_dict = {unicode_key: "value1"}
+        assert unicode_dict == ujson.ujson_loads(ujson.ujson_dumps(unicode_dict))
+    @pytest.mark.parametrize(
+        "double_input", [math.pi, -math.pi]  # Should work with negatives too.
+    )
+    def test_encode_double_conversion(self, double_input):
+        output = ujson.ujson_dumps(double_input)
+        assert round(double_input, 5) == round(json.loads(output), 5)
+        assert round(double_input, 5) == round(ujson.ujson_loads(output), 5)
+    def test_encode_with_decimal(self):
+        decimal_input = 1.0
+        output = ujson.ujson_dumps(decimal_input)
+        assert output == "1.0"
+    def test_encode_array_of_nested_arrays(self):
+        nested_input = [[[[]]]] * 20
+        output = ujson.ujson_dumps(nested_input)
+        assert nested_input == json.loads(output)
+        assert nested_input == ujson.ujson_loads(output)
+    def test_encode_array_of_doubles(self):
+        doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10
+        output = ujson.ujson_dumps(doubles_input)
+        assert doubles_input == json.loads(output)
+        assert doubles_input == ujson.ujson_loads(output)
+    def test_double_precision(self):
+        double_input = 30.012345678901234
+        output = ujson.ujson_dumps(double_input, double_precision=15)
+        assert double_input == json.loads(output)
+        assert double_input == ujson.ujson_loads(output)
+        for double_precision in (3, 9):
+            output = ujson.ujson_dumps(double_input, double_precision=double_precision)
+            rounded_input = round(double_input, double_precision)
+            assert rounded_input == json.loads(output)
+            assert rounded_input == ujson.ujson_loads(output)
+    @pytest.mark.parametrize(
+        "invalid_val",
+        [
+            20,
+            -1,
+            "9",
+            None,
+        ],
+    )
+    def test_invalid_double_precision(self, invalid_val):
+        double_input = 30.12345678901234567890
+        expected_exception = ValueError if isinstance(invalid_val, int) else TypeError
+        msg = (
+            r"Invalid value '.*' for option 'double_precision', max is '15'|"
+            r"an integer is required \(got type |"
+            r"object cannot be interpreted as an integer"
+        )
+        with pytest.raises(expected_exception, match=msg):
+            ujson.ujson_dumps(double_input, double_precision=invalid_val)
+    def test_encode_string_conversion2(self):
+        string_input = "A string \\ / \b \f \n \r \t"
+        output = ujson.ujson_dumps(string_input)
+        assert string_input == json.loads(output)
+        assert string_input == ujson.ujson_loads(output)
+        assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
+    @pytest.mark.parametrize(
+        "unicode_input",
+        ["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"],
+    )
+    def test_encode_unicode_conversion(self, unicode_input):
+        enc = ujson.ujson_dumps(unicode_input)
+        dec = ujson.ujson_loads(enc)
+        assert enc == json.dumps(unicode_input)
+        assert dec == json.loads(enc)
+    def test_encode_control_escaping(self):
+        escaped_input = "\x19"
+        enc = ujson.ujson_dumps(escaped_input)
+        dec = ujson.ujson_loads(enc)
+        assert escaped_input == dec
+        assert enc == json.dumps(escaped_input)
+    def test_encode_unicode_surrogate_pair(self):
+        surrogate_input = "\xf0\x90\x8d\x86"
+        enc = ujson.ujson_dumps(surrogate_input)
+        dec = ujson.ujson_loads(enc)
+        assert enc == json.dumps(surrogate_input)
+        assert dec == json.loads(enc)
+    def test_encode_unicode_4bytes_utf8(self):
+        four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
+        enc = ujson.ujson_dumps(four_bytes_input)
+        dec = ujson.ujson_loads(enc)
+        assert enc == json.dumps(four_bytes_input)
+        assert dec == json.loads(enc)
+    def test_encode_unicode_4bytes_utf8highest(self):
+        four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
+        enc = ujson.ujson_dumps(four_bytes_input)
+        dec = ujson.ujson_loads(enc)
+        assert enc == json.dumps(four_bytes_input)
+        assert dec == json.loads(enc)
+    def test_encode_unicode_error(self):
+        string = "'\udac0'"
+        msg = (
+            r"'utf-8' codec can't encode character '\\udac0' "
+            r"in position 1: surrogates not allowed"
+        )
+        with pytest.raises(UnicodeEncodeError, match=msg):
+            ujson.ujson_dumps([string])
+    def test_encode_array_in_array(self):
+        arr_in_arr_input = [[[[]]]]
+        output = ujson.ujson_dumps(arr_in_arr_input)
+        assert arr_in_arr_input == json.loads(output)
+        assert output == json.dumps(arr_in_arr_input)
+        assert arr_in_arr_input == ujson.ujson_loads(output)
+    @pytest.mark.parametrize(
+        "num_input",
+        [
+            31337,
+            -31337,  # Negative number.
+            -9223372036854775808,  # Large negative number.
+        ],
+    )
+    def test_encode_num_conversion(self, num_input):
+        output = ujson.ujson_dumps(num_input)
+        assert num_input == json.loads(output)
+        assert output == json.dumps(num_input)
+        assert num_input == ujson.ujson_loads(output)
+    def test_encode_list_conversion(self):
+        list_input = [1, 2, 3, 4]
+        output = ujson.ujson_dumps(list_input)
+        assert list_input == json.loads(output)
+        assert list_input == ujson.ujson_loads(output)
+    def test_encode_dict_conversion(self):
+        dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
+        output = ujson.ujson_dumps(dict_input)
+        assert dict_input == json.loads(output)
+        assert dict_input == ujson.ujson_loads(output)
+    @pytest.mark.parametrize("builtin_value", [None, True, False])
+    def test_encode_builtin_values_conversion(self, builtin_value):
+        output = ujson.ujson_dumps(builtin_value)
+        assert builtin_value == json.loads(output)
+        assert output == json.dumps(builtin_value)
+        assert builtin_value == ujson.ujson_loads(output)
+    def test_encode_datetime_conversion(self):
+        datetime_input = datetime.datetime.fromtimestamp(time.time())
+        output = ujson.ujson_dumps(datetime_input, date_unit="s")
+        expected = calendar.timegm(datetime_input.utctimetuple())
+        assert int(expected) == json.loads(output)
+        assert int(expected) == ujson.ujson_loads(output)
+    def test_encode_date_conversion(self):
+        date_input = datetime.date.fromtimestamp(time.time())
+        output = ujson.ujson_dumps(date_input, date_unit="s")
+        tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0)
+        expected = calendar.timegm(tup)
+        assert int(expected) == json.loads(output)
+        assert int(expected) == ujson.ujson_loads(output)
+    @pytest.mark.parametrize(
+        "test",
+        [datetime.time(), datetime.time(1, 2, 3), datetime.time(10, 12, 15, 343243)],
+    )
+    def test_encode_time_conversion_basic(self, test):
+        output = ujson.ujson_dumps(test)
+        expected = f'"{test.isoformat()}"'
+        assert expected == output
+    def test_encode_time_conversion_pytz(self):
+        # see gh-11473: to_json segfaults with timezone-aware datetimes
+        test = datetime.time(10, 12, 15, 343243, pytz.utc)
+        output = ujson.ujson_dumps(test)
+        expected = f'"{test.isoformat()}"'
+        assert expected == output
+    def test_encode_time_conversion_dateutil(self):
+        # see gh-11473: to_json segfaults with timezone-aware datetimes
+        test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
+        output = ujson.ujson_dumps(test)
+        expected = f'"{test.isoformat()}"'
+        assert expected == output
+    @pytest.mark.parametrize(
+        "decoded_input", [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf]
+    )
+    def test_encode_as_null(self, decoded_input):
+        assert ujson.ujson_dumps(decoded_input) == "null", "Expected null"
+    def test_datetime_units(self):
+        val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
+        stamp = Timestamp(val).as_unit("ns")
+        roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="s"))
+        assert roundtrip == stamp._value // 10**9
+        roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="ms"))
+        assert roundtrip == stamp._value // 10**6
+        roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="us"))
+        assert roundtrip == stamp._value // 10**3
+        roundtrip = ujson.ujson_loads(ujson.ujson_dumps(val, date_unit="ns"))
+        assert roundtrip == stamp._value
+        msg = "Invalid value 'foo' for option 'date_unit'"
+        with pytest.raises(ValueError, match=msg):
+            ujson.ujson_dumps(val, date_unit="foo")
+    def test_encode_to_utf8(self):
+        unencoded = "\xe6\x97\xa5\xd1\x88"
+        enc = ujson.ujson_dumps(unencoded, ensure_ascii=False)
+        dec = ujson.ujson_loads(enc)
+        assert enc == json.dumps(unencoded, ensure_ascii=False)
+        assert dec == json.loads(enc)
+    def test_decode_from_unicode(self):
+        unicode_input = '{"obj": 31337}'
+        dec1 = ujson.ujson_loads(unicode_input)
+        dec2 = ujson.ujson_loads(str(unicode_input))
+        assert dec1 == dec2
+    def test_encode_recursion_max(self):
+        # 8 is the max recursion depth
+        class O2:
+            member = 0
+        class O1:
+            member = 0
+        decoded_input = O1()
+        decoded_input.member = O2()
+        decoded_input.member.member = decoded_input
+        with pytest.raises(OverflowError, match="Maximum recursion level reached"):
+            ujson.ujson_dumps(decoded_input)
+    def test_decode_jibberish(self):
+        jibberish = "fdsa sda v9sa fdsa"
+        msg = "Unexpected character found when decoding 'false'"
+        with pytest.raises(ValueError, match=msg):
+            ujson.ujson_loads(jibberish)
+    @pytest.mark.parametrize(
+        "broken_json",
+        [
+            "[",  # Broken array start.
+            "{",  # Broken object start.
+            "]",  # Broken array end.
+            "}",  # Broken object end.
+        ],
+    )
+    def test_decode_broken_json(self, broken_json):
+        msg = "Expected object or value"
+        with pytest.raises(ValueError, match=msg):
+            ujson.ujson_loads(broken_json)
+    @pytest.mark.parametrize("too_big_char", ["[", "{"])
+    def test_decode_depth_too_big(self, too_big_char):
+        with pytest.raises(ValueError, match="Reached object decoding depth limit"):
+            ujson.ujson_loads(too_big_char * (1024 * 1024))
+    @pytest.mark.parametrize(
+        "bad_string",
+        [
+            '"TESTING',  # Unterminated.
+            '"TESTING\\"',  # Unterminated escape.
+            "tru",  # Broken True.
+            "fa",  # Broken False.
+            "n",  # Broken None.
+        ],
+    )
+    def test_decode_bad_string(self, bad_string):
+        msg = (
+            "Unexpected character found when decoding|"
+            "Unmatched ''\"' when when decoding 'string'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            ujson.ujson_loads(bad_string)
+    @pytest.mark.parametrize(
+        "broken_json, err_msg",
+        [
+            (
+                '{{1337:""}}',
+                "Key name of object must be 'string' when decoding 'object'",
+            ),
+            ('{{"key":"}', "Unmatched ''\"' when when decoding 'string'"),
+            ("[[[true", "Unexpected character found when decoding array value (2)"),
+        ],
+    )
+    def test_decode_broken_json_leak(self, broken_json, err_msg):
+        for _ in range(1000):
+            with pytest.raises(ValueError, match=re.escape(err_msg)):
+                ujson.ujson_loads(broken_json)
+    @pytest.mark.parametrize(
+        "invalid_dict",
+        [
+            "{{{{31337}}}}",  # No key.
+            '{{{{"key":}}}}',  # No value.
+            '{{{{"key"}}}}',  # No colon or value.
+        ],
+    )
+    def test_decode_invalid_dict(self, invalid_dict):
+        msg = (
+            "Key name of object must be 'string' when decoding 'object'|"
+            "No ':' found when decoding object value|"
+            "Expected object or value"
+        )
+        with pytest.raises(ValueError, match=msg):
+            ujson.ujson_loads(invalid_dict)
+    @pytest.mark.parametrize(
+        "numeric_int_as_str", ["31337", "-31337"]  # Should work with negatives.
+    )
+    def test_decode_numeric_int(self, numeric_int_as_str):
+        assert int(numeric_int_as_str) == ujson.ujson_loads(numeric_int_as_str)
+    def test_encode_null_character(self):
+        wrapped_input = "31337 \x00 1337"
+        output = ujson.ujson_dumps(wrapped_input)
+        assert wrapped_input == json.loads(output)
+        assert output == json.dumps(wrapped_input)
+        assert wrapped_input == ujson.ujson_loads(output)
+        alone_input = "\x00"
+        output = ujson.ujson_dumps(alone_input)
+        assert alone_input == json.loads(output)
+        assert output == json.dumps(alone_input)
+        assert alone_input == ujson.ujson_loads(output)
+        assert '"  \\u0000\\r\\n "' == ujson.ujson_dumps("  \u0000\r\n ")
+    def test_decode_null_character(self):
+        wrapped_input = '"31337 \\u0000 31337"'
+        assert ujson.ujson_loads(wrapped_input) == json.loads(wrapped_input)
+    def test_encode_list_long_conversion(self):
+        long_input = [
+            9223372036854775807,
+            9223372036854775807,
+            9223372036854775807,
+            9223372036854775807,
+            9223372036854775807,
+            9223372036854775807,
+        ]
+        output = ujson.ujson_dumps(long_input)
+        assert long_input == json.loads(output)
+        assert long_input == ujson.ujson_loads(output)
+    @pytest.mark.parametrize("long_input", [9223372036854775807, 18446744073709551615])
+    def test_encode_long_conversion(self, long_input):
+        output = ujson.ujson_dumps(long_input)
+        assert long_input == json.loads(output)
+        assert output == json.dumps(long_input)
+        assert long_input == ujson.ujson_loads(output)
+    @pytest.mark.parametrize("bigNum", [2**64, -(2**63) - 1])
+    def test_dumps_ints_larger_than_maxsize(self, bigNum):
+        encoding = ujson.ujson_dumps(bigNum)
+        assert str(bigNum) == encoding
+        with pytest.raises(
+            ValueError,
+            match="Value is too big|Value is too small",
+        ):
+            assert ujson.ujson_loads(encoding) == bigNum
+    @pytest.mark.parametrize(
+        "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"]
+    )
+    def test_decode_numeric_int_exp(self, int_exp):
+        assert ujson.ujson_loads(int_exp) == json.loads(int_exp)
+    def test_loads_non_str_bytes_raises(self):
+        msg = "a bytes-like object is required, not 'NoneType'"
+        with pytest.raises(TypeError, match=msg):
+            ujson.ujson_loads(None)
+    @pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1])
+    def test_decode_number_with_32bit_sign_bit(self, val):
+        # Test that numbers that fit within 32 bits but would have the
+        # sign bit set (2**31 <= x < 2**32) are decoded properly.
+        doc = f'{{"id": {val}}}'
+        assert ujson.ujson_loads(doc)["id"] == val
+    def test_encode_big_escape(self):
+        # Make sure no Exception is raised.
+        for _ in range(10):
+            base = "\u00e5".encode()
+            escape_input = base * 1024 * 1024 * 2
+            ujson.ujson_dumps(escape_input)
+    def test_decode_big_escape(self):
+        # Make sure no Exception is raised.
+        for _ in range(10):
+            base = "\u00e5".encode()
+            quote = b'"'
+            escape_input = quote + (base * 1024 * 1024 * 2) + quote
+            ujson.ujson_loads(escape_input)
+    def test_to_dict(self):
+        d = {"key": 31337}
+        class DictTest:
+            def toDict(self):
+                return d
+        o = DictTest()
+        output = ujson.ujson_dumps(o)
+        dec = ujson.ujson_loads(output)
+        assert dec == d
+    def test_default_handler(self):
+        class _TestObject:
+            def __init__(self, val) -> None:
+                self.val = val
+            @property
+            def recursive_attr(self):
+                return _TestObject("recursive_attr")
+            def __str__(self) -> str:
+                return str(self.val)
+        msg = "Maximum recursion level reached"
+        with pytest.raises(OverflowError, match=msg):
+            ujson.ujson_dumps(_TestObject("foo"))
+        assert '"foo"' == ujson.ujson_dumps(_TestObject("foo"), default_handler=str)
+        def my_handler(_):
+            return "foobar"
+        assert '"foobar"' == ujson.ujson_dumps(
+            _TestObject("foo"), default_handler=my_handler
+        )
+        def my_handler_raises(_):
+            raise TypeError("I raise for anything")
+        with pytest.raises(TypeError, match="I raise for anything"):
+            ujson.ujson_dumps(_TestObject("foo"), default_handler=my_handler_raises)
+        def my_int_handler(_):
+            return 42
+        assert (
+            ujson.ujson_loads(
+                ujson.ujson_dumps(_TestObject("foo"), default_handler=my_int_handler)
+            )
+            == 42
+        )
+        def my_obj_handler(_):
+            return datetime.datetime(2013, 2, 3)
+        assert ujson.ujson_loads(
+            ujson.ujson_dumps(datetime.datetime(2013, 2, 3))
+        ) == ujson.ujson_loads(
+            ujson.ujson_dumps(_TestObject("foo"), default_handler=my_obj_handler)
+        )
+        obj_list = [_TestObject("foo"), _TestObject("bar")]
+        assert json.loads(json.dumps(obj_list, default=str)) == ujson.ujson_loads(
+            ujson.ujson_dumps(obj_list, default_handler=str)
+        )
+    def test_encode_object(self):
+        class _TestObject:
+            def __init__(self, a, b, _c, d) -> None:
+                self.a = a
+                self.b = b
+                self._c = _c
+                self.d = d
+            def e(self):
+                return 5
+        # JSON keys should be all non-callable non-underscore attributes, see GH-42768
+        test_object = _TestObject(a=1, b=2, _c=3, d=4)
+        assert ujson.ujson_loads(ujson.ujson_dumps(test_object)) == {
+            "a": 1,
+            "b": 2,
+            "d": 4,
+        }
+    def test_ujson__name__(self):
+        # GH 52898
+        assert ujson.__name__ == "pandas._libs.json"
+class TestNumpyJSONTests:
+    @pytest.mark.parametrize("bool_input", [True, False])
+    def test_bool(self, bool_input):
+        b = bool(bool_input)
+        assert ujson.ujson_loads(ujson.ujson_dumps(b)) == b
+    def test_bool_array(self):
+        bool_array = np.array(
+            [True, False, True, True, False, True, False, False], dtype=bool
+        )
+        output = np.array(ujson.ujson_loads(ujson.ujson_dumps(bool_array)), dtype=bool)
+        tm.assert_numpy_array_equal(bool_array, output)
+    def test_int(self, any_int_numpy_dtype):
+        klass = np.dtype(any_int_numpy_dtype).type
+        num = klass(1)
+        assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num
+    def test_int_array(self, any_int_numpy_dtype):
+        arr = np.arange(100, dtype=int)
+        arr_input = arr.astype(any_int_numpy_dtype)
+        arr_output = np.array(
+            ujson.ujson_loads(ujson.ujson_dumps(arr_input)), dtype=any_int_numpy_dtype
+        )
+        tm.assert_numpy_array_equal(arr_input, arr_output)
+    def test_int_max(self, any_int_numpy_dtype):
+        if any_int_numpy_dtype in ("int64", "uint64") and not IS64:
+            pytest.skip("Cannot test 64-bit integer on 32-bit platform")
+        klass = np.dtype(any_int_numpy_dtype).type
+        # uint64 max will always overflow,
+        # as it's encoded to signed.
+        if any_int_numpy_dtype == "uint64":
+            num = np.iinfo("int64").max
+        else:
+            num = np.iinfo(any_int_numpy_dtype).max
+        assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num
+    def test_float(self, float_numpy_dtype):
+        klass = np.dtype(float_numpy_dtype).type
+        num = klass(256.2013)
+        assert klass(ujson.ujson_loads(ujson.ujson_dumps(num))) == num
+    def test_float_array(self, float_numpy_dtype):
+        arr = np.arange(12.5, 185.72, 1.7322, dtype=float)
+        float_input = arr.astype(float_numpy_dtype)
+        float_output = np.array(
+            ujson.ujson_loads(ujson.ujson_dumps(float_input, double_precision=15)),
+            dtype=float_numpy_dtype,
+        )
+        tm.assert_almost_equal(float_input, float_output)
+    def test_float_max(self, float_numpy_dtype):
+        klass = np.dtype(float_numpy_dtype).type
+        num = klass(np.finfo(float_numpy_dtype).max / 10)
+        tm.assert_almost_equal(
+            klass(ujson.ujson_loads(ujson.ujson_dumps(num, double_precision=15))), num
+        )
+    def test_array_basic(self):
+        arr = np.arange(96)
+        arr = arr.reshape((2, 2, 2, 2, 3, 2))
+        tm.assert_numpy_array_equal(
+            np.array(ujson.ujson_loads(ujson.ujson_dumps(arr))), arr
+        )
+    @pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)])
+    def test_array_reshaped(self, shape):
+        arr = np.arange(100)
+        arr = arr.reshape(shape)
+        tm.assert_numpy_array_equal(
+            np.array(ujson.ujson_loads(ujson.ujson_dumps(arr))), arr
+        )
+    def test_array_list(self):
+        arr_list = [
+            "a",
+            [],
+            {},
+            {},
+            [],
+            42,
+            97.8,
+            ["a", "b"],
+            {"key": "val"},
+        ]
+        arr = np.array(arr_list, dtype=object)
+        result = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=object)
+        tm.assert_numpy_array_equal(result, arr)
+    def test_array_float(self):
+        dtype = np.float32
+        arr = np.arange(100.202, 200.202, 1, dtype=dtype)
+        arr = arr.reshape((5, 5, 4))
+        arr_out = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=dtype)
+        tm.assert_almost_equal(arr, arr_out)
+    def test_0d_array(self):
+        # gh-18878
+        msg = re.escape(
+            "array(1) (numpy-scalar) is not JSON serializable at the moment"
+        )
+        with pytest.raises(TypeError, match=msg):
+            ujson.ujson_dumps(np.array(1))
+    def test_array_long_double(self):
+        msg = re.compile(
+            "1234.5.* \\(numpy-scalar\\) is not JSON serializable at the moment"
+        )
+        with pytest.raises(TypeError, match=msg):
+            ujson.ujson_dumps(np.longdouble(1234.5))
+class TestPandasJSONTests:
+    def test_dataframe(self, orient):
+        dtype = np.int64
+        df = DataFrame(
+            [[1, 2, 3], [4, 5, 6]],
+            index=["a", "b"],
+            columns=["x", "y", "z"],
+            dtype=dtype,
+        )
+        encode_kwargs = {} if orient is None else {"orient": orient}
+        assert (df.dtypes == dtype).all()
+        output = ujson.ujson_loads(ujson.ujson_dumps(df, **encode_kwargs))
+        assert (df.dtypes == dtype).all()
+        # Ensure proper DataFrame initialization.
+        if orient == "split":
+            dec = _clean_dict(output)
+            output = DataFrame(**dec)
+        else:
+            output = DataFrame(output)
+        # Corrections to enable DataFrame comparison.
+        if orient == "values":
+            df.columns = [0, 1, 2]
+            df.index = [0, 1]
+        elif orient == "records":
+            df.index = [0, 1]
+        elif orient == "index":
+            df = df.transpose()
+        assert (df.dtypes == dtype).all()
+        tm.assert_frame_equal(output, df)
+    def test_dataframe_nested(self, orient):
+        df = DataFrame(
+            [[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"]
+        )
+        nested = {"df1": df, "df2": df.copy()}
+        kwargs = {} if orient is None else {"orient": orient}
+        exp = {
+            "df1": ujson.ujson_loads(ujson.ujson_dumps(df, **kwargs)),
+            "df2": ujson.ujson_loads(ujson.ujson_dumps(df, **kwargs)),
+        }
+        assert ujson.ujson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp
+    def test_series(self, orient):
+        dtype = np.int64
+        s = Series(
+            [10, 20, 30, 40, 50, 60],
+            name="series",
+            index=[6, 7, 8, 9, 10, 15],
+            dtype=dtype,
+        ).sort_values()
+        assert s.dtype == dtype
+        encode_kwargs = {} if orient is None else {"orient": orient}
+        output = ujson.ujson_loads(ujson.ujson_dumps(s, **encode_kwargs))
+        assert s.dtype == dtype
+        if orient == "split":
+            dec = _clean_dict(output)
+            output = Series(**dec)
+        else:
+            output = Series(output)
+        if orient in (None, "index"):
+            s.name = None
+            output = output.sort_values()
+            s.index = ["6", "7", "8", "9", "10", "15"]
+        elif orient in ("records", "values"):
+            s.name = None
+            s.index = [0, 1, 2, 3, 4, 5]
+        assert s.dtype == dtype
+        tm.assert_series_equal(output, s)
+    def test_series_nested(self, orient):
+        s = Series(
+            [10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]
+        ).sort_values()
+        nested = {"s1": s, "s2": s.copy()}
+        kwargs = {} if orient is None else {"orient": orient}
+        exp = {
+            "s1": ujson.ujson_loads(ujson.ujson_dumps(s, **kwargs)),
+            "s2": ujson.ujson_loads(ujson.ujson_dumps(s, **kwargs)),
+        }
+        assert ujson.ujson_loads(ujson.ujson_dumps(nested, **kwargs)) == exp
+    def test_index(self):
+        i = Index([23, 45, 18, 98, 43, 11], name="index")
+        # Column indexed.
+        output = Index(ujson.ujson_loads(ujson.ujson_dumps(i)), name="index")
+        tm.assert_index_equal(i, output)
+        dec = _clean_dict(ujson.ujson_loads(ujson.ujson_dumps(i, orient="split")))
+        output = Index(**dec)
+        tm.assert_index_equal(i, output)
+        assert i.name == output.name
+        tm.assert_index_equal(i, output)
+        assert i.name == output.name
+        output = Index(
+            ujson.ujson_loads(ujson.ujson_dumps(i, orient="values")), name="index"
+        )
+        tm.assert_index_equal(i, output)
+        output = Index(
+            ujson.ujson_loads(ujson.ujson_dumps(i, orient="records")), name="index"
+        )
+        tm.assert_index_equal(i, output)
+        output = Index(
+            ujson.ujson_loads(ujson.ujson_dumps(i, orient="index")), name="index"
+        )
+        tm.assert_index_equal(i, output)
+    def test_datetime_index(self):
+        date_unit = "ns"
+        # freq doesn't round-trip
+        rng = DatetimeIndex(list(date_range("1/1/2000", periods=20)), freq=None)
+        encoded = ujson.ujson_dumps(rng, date_unit=date_unit)
+        decoded = DatetimeIndex(np.array(ujson.ujson_loads(encoded)))
+        tm.assert_index_equal(rng, decoded)
+        ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
+        decoded = Series(ujson.ujson_loads(ujson.ujson_dumps(ts, date_unit=date_unit)))
+        idx_values = decoded.index.values.astype(np.int64)
+        decoded.index = DatetimeIndex(idx_values)
+        tm.assert_series_equal(ts, decoded)
+    @pytest.mark.parametrize(
+        "invalid_arr",
+        [
+            "[31337,]",  # Trailing comma.
+            "[,31337]",  # Leading comma.
+            "[]]",  # Unmatched bracket.
+            "[,]",  # Only comma.
+        ],
+    )
+    def test_decode_invalid_array(self, invalid_arr):
+        msg = (
+            "Expected object or value|Trailing data|"
+            "Unexpected character found when decoding array value"
+        )
+        with pytest.raises(ValueError, match=msg):
+            ujson.ujson_loads(invalid_arr)
+    @pytest.mark.parametrize("arr", [[], [31337]])
+    def test_decode_array(self, arr):
+        assert arr == ujson.ujson_loads(str(arr))
+    @pytest.mark.parametrize("extreme_num", [9223372036854775807, -9223372036854775808])
+    def test_decode_extreme_numbers(self, extreme_num):
+        assert extreme_num == ujson.ujson_loads(str(extreme_num))
+    @pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-2**63-1}"])
+    def test_decode_too_extreme_numbers(self, too_extreme_num):
+        with pytest.raises(
+            ValueError,
+            match="Value is too big|Value is too small",
+        ):
+            ujson.ujson_loads(too_extreme_num)
+    def test_decode_with_trailing_whitespaces(self):
+        assert {} == ujson.ujson_loads("{}\n\t ")
+    def test_decode_with_trailing_non_whitespaces(self):
+        with pytest.raises(ValueError, match="Trailing data"):
+            ujson.ujson_loads("{}\n\t a")
+    @pytest.mark.parametrize("value", [f"{2**64}", f"{-2**63-1}"])
+    def test_decode_array_with_big_int(self, value):
+        with pytest.raises(
+            ValueError,
+            match="Value is too big|Value is too small",
+        ):
+            ujson.ujson_loads(value)
+    @pytest.mark.parametrize(
+        "float_number",
+        [
+            1.1234567893,
+            1.234567893,
+            1.34567893,
+            1.4567893,
+            1.567893,
+            1.67893,
+            1.7893,
+            1.893,
+            1.3,
+        ],
+    )
+    @pytest.mark.parametrize("sign", [-1, 1])
+    def test_decode_floating_point(self, sign, float_number):
+        float_number *= sign
+        tm.assert_almost_equal(
+            float_number, ujson.ujson_loads(str(float_number)), rtol=1e-15
+        )
+    def test_encode_big_set(self):
+        s = set()
+        for x in range(100000):
+            s.add(x)
+        # Make sure no Exception is raised.
+        ujson.ujson_dumps(s)
+    def test_encode_empty_set(self):
+        assert "[]" == ujson.ujson_dumps(set())
+    def test_encode_set(self):
+        s = {1, 2, 3, 4, 5, 6, 7, 8, 9}
+        enc = ujson.ujson_dumps(s)
+        dec = ujson.ujson_loads(enc)
+        for v in dec:
+            assert v in s
+    @pytest.mark.parametrize(
+        "td",
+        [
+            Timedelta(days=366),
+            Timedelta(days=-1),
+            Timedelta(hours=13, minutes=5, seconds=5),
+            Timedelta(hours=13, minutes=20, seconds=30),
+            Timedelta(days=-1, nanoseconds=5),
+            Timedelta(nanoseconds=1),
+            Timedelta(microseconds=1, nanoseconds=1),
+            Timedelta(milliseconds=1, microseconds=1, nanoseconds=1),
+            Timedelta(milliseconds=999, microseconds=999, nanoseconds=999),
+        ],
+    )
+    def test_encode_timedelta_iso(self, td):
+        # GH 28256
+        result = ujson.ujson_dumps(td, iso_dates=True)
+        expected = f'"{td.isoformat()}"'
+        assert result == expected
+    def test_encode_periodindex(self):
+        # GH 46683
+        p = PeriodIndex(["2022-04-06", "2022-04-07"], freq="D")
+        df = DataFrame(index=p)
+        assert df.to_json() == "{}"

py311/lib/python3.11/site-packages/pandas/tests/io/parser/test_concatenate_chunks.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import numpy as np
+import pytest
+from pandas.errors import DtypeWarning
+import pandas._testing as tm
+from pandas.core.arrays import ArrowExtensionArray
+from pandas.io.parsers.c_parser_wrapper import _concatenate_chunks
+def test_concatenate_chunks_pyarrow():
+    # GH#51876
+    pa = pytest.importorskip("pyarrow")
+    chunks = [
+        {0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
+        {0: ArrowExtensionArray(pa.array([1, 2]))},
+    ]
+    result = _concatenate_chunks(chunks)
+    expected = ArrowExtensionArray(pa.array([1.5, 2.5, 1.0, 2.0]))
+    tm.assert_extension_array_equal(result[0], expected)
+def test_concatenate_chunks_pyarrow_strings():
+    # GH#51876
+    pa = pytest.importorskip("pyarrow")
+    chunks = [
+        {0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
+        {0: ArrowExtensionArray(pa.array(["a", "b"]))},
+    ]
+    with tm.assert_produces_warning(DtypeWarning, match="have mixed types"):
+        result = _concatenate_chunks(chunks)
+    expected = np.concatenate(
+        [np.array([1.5, 2.5], dtype=object), np.array(["a", "b"])]
+    )
+    tm.assert_numpy_array_equal(result[0], expected)

py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_categorical.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import numpy as np
+import pytest
+from pandas import (
+    Categorical,
+    DataFrame,
+    Series,
+    _testing as tm,
+    concat,
+    read_hdf,
+)
+from pandas.tests.io.pytables.common import (
+    _maybe_remove,
+    ensure_clean_store,
+)
+pytestmark = [pytest.mark.single_cpu]
+def test_categorical(setup_path):
+    with ensure_clean_store(setup_path) as store:
+        # Basic
+        _maybe_remove(store, "s")
+        s = Series(
+            Categorical(
+                ["a", "b", "b", "a", "a", "c"],
+                categories=["a", "b", "c", "d"],
+                ordered=False,
+            )
+        )
+        store.append("s", s, format="table")
+        result = store.select("s")
+        tm.assert_series_equal(s, result)
+        _maybe_remove(store, "s_ordered")
+        s = Series(
+            Categorical(
+                ["a", "b", "b", "a", "a", "c"],
+                categories=["a", "b", "c", "d"],
+                ordered=True,
+            )
+        )
+        store.append("s_ordered", s, format="table")
+        result = store.select("s_ordered")
+        tm.assert_series_equal(s, result)
+        _maybe_remove(store, "df")
+        df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
+        store.append("df", df, format="table")
+        result = store.select("df")
+        tm.assert_frame_equal(result, df)
+        # Dtypes
+        _maybe_remove(store, "si")
+        s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
+        store.append("si", s)
+        result = store.select("si")
+        tm.assert_series_equal(result, s)
+        _maybe_remove(store, "si2")
+        s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
+        store.append("si2", s)
+        result = store.select("si2")
+        tm.assert_series_equal(result, s)
+        # Multiple
+        _maybe_remove(store, "df2")
+        df2 = df.copy()
+        df2["s2"] = Series(list("abcdefg")).astype("category")
+        store.append("df2", df2)
+        result = store.select("df2")
+        tm.assert_frame_equal(result, df2)
+        # Make sure the metadata is OK
+        info = store.info()
+        assert "/df2   " in info
+        # df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical
+        assert "/df2/meta/values_block_0/meta" in info
+        assert "/df2/meta/values_block_2/meta" in info
+        # unordered
+        _maybe_remove(store, "s2")
+        s = Series(
+            Categorical(
+                ["a", "b", "b", "a", "a", "c"],
+                categories=["a", "b", "c", "d"],
+                ordered=False,
+            )
+        )
+        store.append("s2", s, format="table")
+        result = store.select("s2")
+        tm.assert_series_equal(result, s)
+        # Query
+        _maybe_remove(store, "df3")
+        store.append("df3", df, data_columns=["s"])
+        expected = df[df.s.isin(["b", "c"])]
+        result = store.select("df3", where=['s in ["b","c"]'])
+        tm.assert_frame_equal(result, expected)
+        expected = df[df.s.isin(["b", "c"])]
+        result = store.select("df3", where=['s = ["b","c"]'])
+        tm.assert_frame_equal(result, expected)
+        expected = df[df.s.isin(["d"])]
+        result = store.select("df3", where=['s in ["d"]'])
+        tm.assert_frame_equal(result, expected)
+        expected = df[df.s.isin(["f"])]
+        result = store.select("df3", where=['s in ["f"]'])
+        tm.assert_frame_equal(result, expected)
+        # Appending with same categories is ok
+        store.append("df3", df)
+        df = concat([df, df])
+        expected = df[df.s.isin(["b", "c"])]
+        result = store.select("df3", where=['s in ["b","c"]'])
+        tm.assert_frame_equal(result, expected)
+        # Appending must have the same categories
+        df3 = df.copy()
+        df3["s"] = df3["s"].cat.remove_unused_categories()
+        msg = "cannot append a categorical with different categories to the existing"
+        with pytest.raises(ValueError, match=msg):
+            store.append("df3", df3)
+        # Remove, and make sure meta data is removed (its a recursive
+        # removal so should be).
+        result = store.select("df3/meta/s/meta")
+        assert result is not None
+        store.remove("df3")
+        with pytest.raises(
+            KeyError, match="'No object named df3/meta/s/meta in the file'"
+        ):
+            store.select("df3/meta/s/meta")
+def test_categorical_conversion(tmp_path, setup_path):
+    # GH13322
+    # Check that read_hdf with categorical columns doesn't return rows if
+    # where criteria isn't met.
+    obsids = ["ESP_012345_6789", "ESP_987654_3210"]
+    imgids = ["APF00006np", "APF0001imm"]
+    data = [4.3, 9.8]
+    # Test without categories
+    df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})
+    # We are expecting an empty DataFrame matching types of df
+    expected = df.iloc[[], :]
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", data_columns=True)
+    result = read_hdf(path, "df", where="obsids=B")
+    tm.assert_frame_equal(result, expected)
+    # Test with categories
+    df.obsids = df.obsids.astype("category")
+    df.imgids = df.imgids.astype("category")
+    # We are expecting an empty DataFrame matching types of df
+    expected = df.iloc[[], :]
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", data_columns=True)
+    result = read_hdf(path, "df", where="obsids=B")
+    tm.assert_frame_equal(result, expected)
+def test_categorical_nan_only_columns(tmp_path, setup_path):
+    # GH18413
+    # Check that read_hdf with categorical columns with NaN-only values can
+    # be read back.
+    df = DataFrame(
+        {
+            "a": ["a", "b", "c", np.nan],
+            "b": [np.nan, np.nan, np.nan, np.nan],
+            "c": [1, 2, 3, 4],
+            "d": Series([None] * 4, dtype=object),
+        }
+    )
+    df["a"] = df.a.astype("category")
+    df["b"] = df.b.astype("category")
+    df["d"] = df.b.astype("category")
+    expected = df
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", data_columns=True)
+    result = read_hdf(path, "df")
+    tm.assert_frame_equal(result, expected)
+@pytest.mark.parametrize(
+    "where, df, expected",
+    [
+        ('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})),
+        ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})),
+    ],
+)
+def test_convert_value(
+    tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame
+):
+    # GH39420
+    # Check that read_hdf with categorical columns can filter by where condition.
+    df.col = df.col.astype("category")
+    max_widths = {"col": 1}
+    categorical_values = sorted(df.col.unique())
+    expected.col = expected.col.astype("category")
+    expected.col = expected.col.cat.set_categories(categorical_values)
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", min_itemsize=max_widths)
+    result = read_hdf(path, where=where)
+    tm.assert_frame_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/io/pytables/test_read.py ADDED Viewed

	@@ -0,0 +1,417 @@

+from contextlib import closing
+from pathlib import Path
+import re
+import numpy as np
+import pytest
+from pandas._libs.tslibs import Timestamp
+from pandas.compat import is_platform_windows
+import pandas as pd
+from pandas import (
+    DataFrame,
+    HDFStore,
+    Index,
+    Series,
+    _testing as tm,
+    date_range,
+    read_hdf,
+)
+from pandas.tests.io.pytables.common import (
+    _maybe_remove,
+    ensure_clean_store,
+)
+from pandas.util import _test_decorators as td
+from pandas.io.pytables import TableIterator
+pytestmark = [pytest.mark.single_cpu]
+def test_read_missing_key_close_store(tmp_path, setup_path):
+    # GH 25766
+    path = tmp_path / setup_path
+    df = DataFrame({"a": range(2), "b": range(2)})
+    df.to_hdf(path, key="k1")
+    with pytest.raises(KeyError, match="'No object named k2 in the file'"):
+        read_hdf(path, "k2")
+    # smoke test to test that file is properly closed after
+    # read with KeyError before another write
+    df.to_hdf(path, key="k2")
+def test_read_index_error_close_store(tmp_path, setup_path):
+    # GH 25766
+    path = tmp_path / setup_path
+    df = DataFrame({"A": [], "B": []}, index=[])
+    df.to_hdf(path, key="k1")
+    with pytest.raises(IndexError, match=r"list index out of range"):
+        read_hdf(path, "k1", stop=0)
+    # smoke test to test that file is properly closed after
+    # read with IndexError before another write
+    df.to_hdf(path, key="k1")
+def test_read_missing_key_opened_store(tmp_path, setup_path):
+    # GH 28699
+    path = tmp_path / setup_path
+    df = DataFrame({"a": range(2), "b": range(2)})
+    df.to_hdf(path, key="k1")
+    with HDFStore(path, "r") as store:
+        with pytest.raises(KeyError, match="'No object named k2 in the file'"):
+            read_hdf(store, "k2")
+        # Test that the file is still open after a KeyError and that we can
+        # still read from it.
+        read_hdf(store, "k1")
+def test_read_column(setup_path):
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((10, 4)),
+        columns=Index(list("ABCD")),
+        index=date_range("2000-01-01", periods=10, freq="B"),
+    )
+    with ensure_clean_store(setup_path) as store:
+        _maybe_remove(store, "df")
+        # GH 17912
+        # HDFStore.select_column should raise a KeyError
+        # exception if the key is not a valid store
+        with pytest.raises(KeyError, match="No object named df in the file"):
+            store.select_column("df", "index")
+        store.append("df", df)
+        # error
+        with pytest.raises(
+            KeyError, match=re.escape("'column [foo] not found in the table'")
+        ):
+            store.select_column("df", "foo")
+        msg = re.escape("select_column() got an unexpected keyword argument 'where'")
+        with pytest.raises(TypeError, match=msg):
+            store.select_column("df", "index", where=["index>5"])
+        # valid
+        result = store.select_column("df", "index")
+        tm.assert_almost_equal(result.values, Series(df.index).values)
+        assert isinstance(result, Series)
+        # not a data indexable column
+        msg = re.escape(
+            "column [values_block_0] can not be extracted individually; "
+            "it is not data indexable"
+        )
+        with pytest.raises(ValueError, match=msg):
+            store.select_column("df", "values_block_0")
+        # a data column
+        df2 = df.copy()
+        df2["string"] = "foo"
+        store.append("df2", df2, data_columns=["string"])
+        result = store.select_column("df2", "string")
+        tm.assert_almost_equal(result.values, df2["string"].values)
+        # a data column with NaNs, result excludes the NaNs
+        df3 = df.copy()
+        df3["string"] = "foo"
+        df3.loc[df3.index[4:6], "string"] = np.nan
+        store.append("df3", df3, data_columns=["string"])
+        result = store.select_column("df3", "string")
+        tm.assert_almost_equal(result.values, df3["string"].values)
+        # start/stop
+        result = store.select_column("df3", "string", start=2)
+        tm.assert_almost_equal(result.values, df3["string"].values[2:])
+        result = store.select_column("df3", "string", start=-2)
+        tm.assert_almost_equal(result.values, df3["string"].values[-2:])
+        result = store.select_column("df3", "string", stop=2)
+        tm.assert_almost_equal(result.values, df3["string"].values[:2])
+        result = store.select_column("df3", "string", stop=-2)
+        tm.assert_almost_equal(result.values, df3["string"].values[:-2])
+        result = store.select_column("df3", "string", start=2, stop=-2)
+        tm.assert_almost_equal(result.values, df3["string"].values[2:-2])
+        result = store.select_column("df3", "string", start=-2, stop=2)
+        tm.assert_almost_equal(result.values, df3["string"].values[-2:2])
+        # GH 10392 - make sure column name is preserved
+        df4 = DataFrame({"A": np.random.default_rng(2).standard_normal(10), "B": "foo"})
+        store.append("df4", df4, data_columns=True)
+        expected = df4["B"]
+        result = store.select_column("df4", "B")
+        tm.assert_series_equal(result, expected)
+def test_pytables_native_read(datapath):
+    with ensure_clean_store(
+        datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
+    ) as store:
+        d2 = store["detector/readout"]
+    assert isinstance(d2, DataFrame)
+@pytest.mark.skipif(is_platform_windows(), reason="native2 read fails oddly on windows")
+def test_pytables_native2_read(datapath):
+    with ensure_clean_store(
+        datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"
+    ) as store:
+        str(store)
+        d1 = store["detector"]
+    assert isinstance(d1, DataFrame)
+def test_legacy_table_fixed_format_read_py2(datapath):
+    # GH 24510
+    # legacy table with fixed format written in Python 2
+    with ensure_clean_store(
+        datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"
+    ) as store:
+        result = store.select("df")
+    expected = DataFrame(
+        [[1, 2, 3, "D"]],
+        columns=["A", "B", "C", "D"],
+        index=Index(["ABC"], name="INDEX_NAME"),
+    )
+    tm.assert_frame_equal(expected, result)
+def test_legacy_table_fixed_format_read_datetime_py2(datapath):
+    # GH 31750
+    # legacy table with fixed format and datetime64 column written in Python 2
+    expected = DataFrame(
+        [[Timestamp("2020-02-06T18:00")]],
+        columns=["A"],
+        index=Index(["date"]),
+        dtype="M8[ns]",
+    )
+    with ensure_clean_store(
+        datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),
+        mode="r",
+    ) as store:
+        result = store.select("df")
+    tm.assert_frame_equal(expected, result)
+def test_legacy_table_read_py2(datapath):
+    # issue: 24925
+    # legacy table written in Python 2
+    with ensure_clean_store(
+        datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"
+    ) as store:
+        result = store.select("table")
+    expected = DataFrame({"a": ["a", "b"], "b": [2, 3]})
+    tm.assert_frame_equal(expected, result)
+def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string):
+    # GH10330
+    # No check for non-string path_or-buf, and no test of open store
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    df.index.name = "letters"
+    df = df.set_index(keys="E", append=True)
+    path = tmp_path / setup_path
+    if using_infer_string:
+        # TODO(infer_string) make this work for string dtype
+        msg = "Saving a MultiIndex with an extension dtype is not supported."
+        with pytest.raises(NotImplementedError, match=msg):
+            df.to_hdf(path, key="df", mode="w")
+        return
+    df.to_hdf(path, key="df", mode="w")
+    direct = read_hdf(path, "df")
+    with HDFStore(path, mode="r") as store:
+        indirect = read_hdf(store, "df")
+        tm.assert_frame_equal(direct, indirect)
+        assert store.is_open
+def test_read_hdf_index_not_view(tmp_path, setup_path):
+    # GH 37441
+    # Ensure that the index of the DataFrame is not a view
+    # into the original recarray that pytables reads in
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=[0, 1, 2, 3],
+        columns=list("ABCDE"),
+    )
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", mode="w", format="table")
+    df2 = read_hdf(path, "df")
+    assert df2.index._data.base is None
+    tm.assert_frame_equal(df, df2)
+def test_read_hdf_iterator(tmp_path, setup_path):
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    df.index.name = "letters"
+    df = df.set_index(keys="E", append=True)
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", mode="w", format="t")
+    direct = read_hdf(path, "df")
+    iterator = read_hdf(path, "df", iterator=True)
+    with closing(iterator.store):
+        assert isinstance(iterator, TableIterator)
+        indirect = next(iterator.__iter__())
+    tm.assert_frame_equal(direct, indirect)
+def test_read_nokey(tmp_path, setup_path):
+    # GH10443
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    # Categorical dtype not supported for "fixed" format. So no need
+    # to test with that dtype in the dataframe here.
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", mode="a")
+    reread = read_hdf(path)
+    tm.assert_frame_equal(df, reread)
+    df.to_hdf(path, key="df2", mode="a")
+    msg = "key must be provided when HDF5 file contains multiple datasets."
+    with pytest.raises(ValueError, match=msg):
+        read_hdf(path)
+def test_read_nokey_table(tmp_path, setup_path):
+    # GH13231
+    df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", mode="a", format="table")
+    reread = read_hdf(path)
+    tm.assert_frame_equal(df, reread)
+    df.to_hdf(path, key="df2", mode="a", format="table")
+    msg = "key must be provided when HDF5 file contains multiple datasets."
+    with pytest.raises(ValueError, match=msg):
+        read_hdf(path)
+def test_read_nokey_empty(tmp_path, setup_path):
+    path = tmp_path / setup_path
+    store = HDFStore(path)
+    store.close()
+    msg = re.escape(
+        "Dataset(s) incompatible with Pandas data types, not table, or no "
+        "datasets found in HDF5 file."
+    )
+    with pytest.raises(ValueError, match=msg):
+        read_hdf(path)
+def test_read_from_pathlib_path(tmp_path, setup_path):
+    # GH11773
+    expected = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    filename = tmp_path / setup_path
+    path_obj = Path(filename)
+    expected.to_hdf(path_obj, key="df", mode="a")
+    actual = read_hdf(path_obj, key="df")
+    tm.assert_frame_equal(expected, actual)
+@td.skip_if_no("py.path")
+def test_read_from_py_localpath(tmp_path, setup_path):
+    # GH11773
+    from py.path import local as LocalPath
+    expected = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    filename = tmp_path / setup_path
+    path_obj = LocalPath(filename)
+    expected.to_hdf(path_obj, key="df", mode="a")
+    actual = read_hdf(path_obj, key="df")
+    tm.assert_frame_equal(expected, actual)
+@pytest.mark.parametrize("format", ["fixed", "table"])
+def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
+    # GH 16583
+    # Tests that reading a Series saved to an HDF file
+    # still works if a mode='r' argument is supplied
+    series = Series(range(10), dtype=np.float64)
+    path = tmp_path / setup_path
+    series.to_hdf(path, key="data", format=format)
+    result = read_hdf(path, key="data", mode="r")
+    tm.assert_series_equal(result, series)
+@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
+@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
+def test_read_py2_hdf_file_in_py3(datapath):
+    # GH 16781
+    # tests reading a PeriodIndex DataFrame written in Python2 in Python3
+    # the file was generated in Python 2.7 like so:
+    #
+    # df = DataFrame([1.,2,3], index=pd.PeriodIndex(
+    #              ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
+    # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
+    expected = DataFrame(
+        [1.0, 2, 3],
+        index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),
+    )
+    with ensure_clean_store(
+        datapath(
+            "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"
+        ),
+        mode="r",
+    ) as store:
+        result = store["p"]
+    tm.assert_frame_equal(result, expected)
+def test_read_infer_string(tmp_path, setup_path):
+    # GH#54431
+    df = DataFrame({"a": ["a", "b", None]})
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="data", format="table")
+    with pd.option_context("future.infer_string", True):
+        result = read_hdf(path, key="data", mode="r")
+    expected = DataFrame(
+        {"a": ["a", "b", None]},
+        dtype=pd.StringDtype(na_value=np.nan),
+        columns=Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
+    )
+    tm.assert_frame_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/__init__.py ADDED Viewed

File without changes

py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_constructors.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import pytest
+from pandas import (
+    Interval,
+    Period,
+    Timestamp,
+)
+class TestIntervalConstructors:
+    @pytest.mark.parametrize(
+        "left, right",
+        [
+            ("a", "z"),
+            (("a", "b"), ("c", "d")),
+            (list("AB"), list("ab")),
+            (Interval(0, 1), Interval(1, 2)),
+            (Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")),
+        ],
+    )
+    def test_construct_errors(self, left, right):
+        # GH#23013
+        msg = "Only numeric, Timestamp and Timedelta endpoints are allowed"
+        with pytest.raises(ValueError, match=msg):
+            Interval(left, right)
+    def test_constructor_errors(self):
+        msg = "invalid option for 'closed': foo"
+        with pytest.raises(ValueError, match=msg):
+            Interval(0, 1, closed="foo")
+        msg = "left side of interval must be <= right side"
+        with pytest.raises(ValueError, match=msg):
+            Interval(1, 0)
+    @pytest.mark.parametrize(
+        "tz_left, tz_right", [(None, "UTC"), ("UTC", None), ("UTC", "US/Eastern")]
+    )
+    def test_constructor_errors_tz(self, tz_left, tz_right):
+        # GH#18538
+        left = Timestamp("2017-01-01", tz=tz_left)
+        right = Timestamp("2017-01-02", tz=tz_right)
+        if tz_left is None or tz_right is None:
+            error = TypeError
+            msg = "Cannot compare tz-naive and tz-aware timestamps"
+        else:
+            error = ValueError
+            msg = "left and right must have the same time zone"
+        with pytest.raises(error, match=msg):
+            Interval(left, right)

py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_contains.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import pytest
+from pandas import (
+    Interval,
+    Timedelta,
+    Timestamp,
+)
+class TestContains:
+    def test_contains(self):
+        interval = Interval(0, 1)
+        assert 0.5 in interval
+        assert 1 in interval
+        assert 0 not in interval
+        interval_both = Interval(0, 1, "both")
+        assert 0 in interval_both
+        assert 1 in interval_both
+        interval_neither = Interval(0, 1, closed="neither")
+        assert 0 not in interval_neither
+        assert 0.5 in interval_neither
+        assert 1 not in interval_neither
+    def test_contains_interval(self, inclusive_endpoints_fixture):
+        interval1 = Interval(0, 1, "both")
+        interval2 = Interval(0, 1, inclusive_endpoints_fixture)
+        assert interval1 in interval1
+        assert interval2 in interval2
+        assert interval2 in interval1
+        assert interval1 not in interval2 or inclusive_endpoints_fixture == "both"
+    def test_contains_infinite_length(self):
+        interval1 = Interval(0, 1, "both")
+        interval2 = Interval(float("-inf"), float("inf"), "neither")
+        assert interval1 in interval2
+        assert interval2 not in interval1
+    def test_contains_zero_length(self):
+        interval1 = Interval(0, 1, "both")
+        interval2 = Interval(-1, -1, "both")
+        interval3 = Interval(0.5, 0.5, "both")
+        assert interval2 not in interval1
+        assert interval3 in interval1
+        assert interval2 not in interval3 and interval3 not in interval2
+        assert interval1 not in interval2 and interval1 not in interval3
+    @pytest.mark.parametrize(
+        "type1",
+        [
+            (0, 1),
+            (Timestamp(2000, 1, 1, 0), Timestamp(2000, 1, 1, 1)),
+            (Timedelta("0h"), Timedelta("1h")),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "type2",
+        [
+            (0, 1),
+            (Timestamp(2000, 1, 1, 0), Timestamp(2000, 1, 1, 1)),
+            (Timedelta("0h"), Timedelta("1h")),
+        ],
+    )
+    def test_contains_mixed_types(self, type1, type2):
+        interval1 = Interval(*type1)
+        interval2 = Interval(*type2)
+        if type1 == type2:
+            assert interval1 in interval2
+        else:
+            msg = "^'<=' not supported between instances of"
+            with pytest.raises(TypeError, match=msg):
+                interval1 in interval2

py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_interval.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import numpy as np
+import pytest
+from pandas import (
+    Interval,
+    Timedelta,
+    Timestamp,
+)
+@pytest.fixture
+def interval():
+    return Interval(0, 1)
+class TestInterval:
+    def test_properties(self, interval):
+        assert interval.closed == "right"
+        assert interval.left == 0
+        assert interval.right == 1
+        assert interval.mid == 0.5
+    def test_hash(self, interval):
+        # should not raise
+        hash(interval)
+    @pytest.mark.parametrize(
+        "left, right, expected",
+        [
+            (0, 5, 5),
+            (-2, 5.5, 7.5),
+            (10, 10, 0),
+            (10, np.inf, np.inf),
+            (-np.inf, -5, np.inf),
+            (-np.inf, np.inf, np.inf),
+            (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")),
+            (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")),
+            (Timedelta("1h10min"), Timedelta("5h5min"), Timedelta("3h55min")),
+            (Timedelta("5s"), Timedelta("1h"), Timedelta("59min55s")),
+        ],
+    )
+    def test_length(self, left, right, expected):
+        # GH 18789
+        iv = Interval(left, right)
+        result = iv.length
+        assert result == expected
+    @pytest.mark.parametrize(
+        "left, right, expected",
+        [
+            ("2017-01-01", "2017-01-06", "5 days"),
+            ("2017-01-01", "2017-01-01 12:00:00", "12 hours"),
+            ("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"),
+            ("2017-01-01 12:01", "2017-01-05 17:31:00", "4 days 5 hours 30 min"),
+        ],
+    )
+    @pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern"))
+    def test_length_timestamp(self, tz, left, right, expected):
+        # GH 18789
+        iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz))
+        result = iv.length
+        expected = Timedelta(expected)
+        assert result == expected
+    @pytest.mark.parametrize(
+        "left, right",
+        [
+            (0, 1),
+            (Timedelta("0 days"), Timedelta("1 day")),
+            (Timestamp("2018-01-01"), Timestamp("2018-01-02")),
+            (
+                Timestamp("2018-01-01", tz="US/Eastern"),
+                Timestamp("2018-01-02", tz="US/Eastern"),
+            ),
+        ],
+    )
+    def test_is_empty(self, left, right, closed):
+        # GH27219
+        # non-empty always return False
+        iv = Interval(left, right, closed)
+        assert iv.is_empty is False
+        # same endpoint is empty except when closed='both' (contains one point)
+        iv = Interval(left, left, closed)
+        result = iv.is_empty
+        expected = closed != "both"
+        assert result is expected

py311/lib/python3.11/site-packages/pandas/tests/scalar/interval/test_overlaps.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pytest
+from pandas import (
+    Interval,
+    Timedelta,
+    Timestamp,
+)
+@pytest.fixture(
+    params=[
+        (Timedelta("0 days"), Timedelta("1 day")),
+        (Timestamp("2018-01-01"), Timedelta("1 day")),
+        (0, 1),
+    ],
+    ids=lambda x: type(x[0]).__name__,
+)
+def start_shift(request):
+    """
+    Fixture for generating intervals of types from a start value and a shift
+    value that can be added to start to generate an endpoint
+    """
+    return request.param
+class TestOverlaps:
+    def test_overlaps_self(self, start_shift, closed):
+        start, shift = start_shift
+        interval = Interval(start, start + shift, closed)
+        assert interval.overlaps(interval)
+    def test_overlaps_nested(self, start_shift, closed, other_closed):
+        start, shift = start_shift
+        interval1 = Interval(start, start + 3 * shift, other_closed)
+        interval2 = Interval(start + shift, start + 2 * shift, closed)
+        # nested intervals should always overlap
+        assert interval1.overlaps(interval2)
+    def test_overlaps_disjoint(self, start_shift, closed, other_closed):
+        start, shift = start_shift
+        interval1 = Interval(start, start + shift, other_closed)
+        interval2 = Interval(start + 2 * shift, start + 3 * shift, closed)
+        # disjoint intervals should never overlap
+        assert not interval1.overlaps(interval2)
+    def test_overlaps_endpoint(self, start_shift, closed, other_closed):
+        start, shift = start_shift
+        interval1 = Interval(start, start + shift, other_closed)
+        interval2 = Interval(start + shift, start + 2 * shift, closed)
+        # overlap if shared endpoint is closed for both (overlap at a point)
+        result = interval1.overlaps(interval2)
+        expected = interval1.closed_right and interval2.closed_left
+        assert result == expected
+    @pytest.mark.parametrize(
+        "other",
+        [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")],
+        ids=lambda x: type(x).__name__,
+    )
+    def test_overlaps_invalid_type(self, other):
+        interval = Interval(0, 1)
+        msg = f"`other` must be an Interval, got {type(other).__name__}"
+        with pytest.raises(TypeError, match=msg):
+            interval.overlaps(other)

py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_formats.py ADDED Viewed

	@@ -0,0 +1,201 @@

+from datetime import datetime
+import pprint
+import dateutil.tz
+import pytest
+import pytz  # a test below uses pytz but only inside a `eval` call
+from pandas import Timestamp
+ts_no_ns = Timestamp(
+    year=2019,
+    month=5,
+    day=18,
+    hour=15,
+    minute=17,
+    second=8,
+    microsecond=132263,
+)
+ts_no_ns_year1 = Timestamp(
+    year=1,
+    month=5,
+    day=18,
+    hour=15,
+    minute=17,
+    second=8,
+    microsecond=132263,
+)
+ts_ns = Timestamp(
+    year=2019,
+    month=5,
+    day=18,
+    hour=15,
+    minute=17,
+    second=8,
+    microsecond=132263,
+    nanosecond=123,
+)
+ts_ns_tz = Timestamp(
+    year=2019,
+    month=5,
+    day=18,
+    hour=15,
+    minute=17,
+    second=8,
+    microsecond=132263,
+    nanosecond=123,
+    tz="UTC",
+)
+ts_no_us = Timestamp(
+    year=2019,
+    month=5,
+    day=18,
+    hour=15,
+    minute=17,
+    second=8,
+    microsecond=0,
+    nanosecond=123,
+)
+@pytest.mark.parametrize(
+    "ts, timespec, expected_iso",
+    [
+        (ts_no_ns, "auto", "2019-05-18T15:17:08.132263"),
+        (ts_no_ns, "seconds", "2019-05-18T15:17:08"),
+        (ts_no_ns, "nanoseconds", "2019-05-18T15:17:08.132263000"),
+        (ts_no_ns_year1, "seconds", "0001-05-18T15:17:08"),
+        (ts_no_ns_year1, "nanoseconds", "0001-05-18T15:17:08.132263000"),
+        (ts_ns, "auto", "2019-05-18T15:17:08.132263123"),
+        (ts_ns, "hours", "2019-05-18T15"),
+        (ts_ns, "minutes", "2019-05-18T15:17"),
+        (ts_ns, "seconds", "2019-05-18T15:17:08"),
+        (ts_ns, "milliseconds", "2019-05-18T15:17:08.132"),
+        (ts_ns, "microseconds", "2019-05-18T15:17:08.132263"),
+        (ts_ns, "nanoseconds", "2019-05-18T15:17:08.132263123"),
+        (ts_ns_tz, "auto", "2019-05-18T15:17:08.132263123+00:00"),
+        (ts_ns_tz, "hours", "2019-05-18T15+00:00"),
+        (ts_ns_tz, "minutes", "2019-05-18T15:17+00:00"),
+        (ts_ns_tz, "seconds", "2019-05-18T15:17:08+00:00"),
+        (ts_ns_tz, "milliseconds", "2019-05-18T15:17:08.132+00:00"),
+        (ts_ns_tz, "microseconds", "2019-05-18T15:17:08.132263+00:00"),
+        (ts_ns_tz, "nanoseconds", "2019-05-18T15:17:08.132263123+00:00"),
+        (ts_no_us, "auto", "2019-05-18T15:17:08.000000123"),
+    ],
+)
+def test_isoformat(ts, timespec, expected_iso):
+    assert ts.isoformat(timespec=timespec) == expected_iso
+class TestTimestampRendering:
+    timezones = ["UTC", "Asia/Tokyo", "US/Eastern", "dateutil/America/Los_Angeles"]
+    @pytest.mark.parametrize("tz", timezones)
+    @pytest.mark.parametrize("freq", ["D", "M", "S", "N"])
+    @pytest.mark.parametrize(
+        "date", ["2014-03-07", "2014-01-01 09:00", "2014-01-01 00:00:00.000000001"]
+    )
+    def test_repr(self, date, freq, tz):
+        # avoid to match with timezone name
+        freq_repr = f"'{freq}'"
+        if tz.startswith("dateutil"):
+            tz_repr = tz.replace("dateutil", "")
+        else:
+            tz_repr = tz
+        date_only = Timestamp(date)
+        assert date in repr(date_only)
+        assert tz_repr not in repr(date_only)
+        assert freq_repr not in repr(date_only)
+        assert date_only == eval(repr(date_only))
+        date_tz = Timestamp(date, tz=tz)
+        assert date in repr(date_tz)
+        assert tz_repr in repr(date_tz)
+        assert freq_repr not in repr(date_tz)
+        assert date_tz == eval(repr(date_tz))
+    def test_repr_utcoffset(self):
+        # This can cause the tz field to be populated, but it's redundant to
+        # include this information in the date-string.
+        date_with_utc_offset = Timestamp("2014-03-13 00:00:00-0400", tz=None)
+        assert "2014-03-13 00:00:00-0400" in repr(date_with_utc_offset)
+        assert "tzoffset" not in repr(date_with_utc_offset)
+        assert "UTC-04:00" in repr(date_with_utc_offset)
+        expr = repr(date_with_utc_offset)
+        assert date_with_utc_offset == eval(expr)
+    def test_timestamp_repr_pre1900(self):
+        # pre-1900
+        stamp = Timestamp("1850-01-01", tz="US/Eastern")
+        repr(stamp)
+        iso8601 = "1850-01-01 01:23:45.012345"
+        stamp = Timestamp(iso8601, tz="US/Eastern")
+        result = repr(stamp)
+        assert iso8601 in result
+    def test_pprint(self):
+        # GH#12622
+        nested_obj = {"foo": 1, "bar": [{"w": {"a": Timestamp("2011-01-01")}}] * 10}
+        result = pprint.pformat(nested_obj, width=50)
+        expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}},
+         {'w': {'a': Timestamp('2011-01-01 00:00:00')}}],
+ 'foo': 1}"""
+        assert result == expected
+    def test_to_timestamp_repr_is_code(self):
+        zs = [
+            Timestamp("99-04-17 00:00:00", tz="UTC"),
+            Timestamp("2001-04-17 00:00:00", tz="UTC"),
+            Timestamp("2001-04-17 00:00:00", tz="America/Los_Angeles"),
+            Timestamp("2001-04-17 00:00:00", tz=None),
+        ]
+        for z in zs:
+            assert eval(repr(z)) == z
+    def test_repr_matches_pydatetime_no_tz(self):
+        dt_date = datetime(2013, 1, 2)
+        assert str(dt_date) == str(Timestamp(dt_date))
+        dt_datetime = datetime(2013, 1, 2, 12, 1, 3)
+        assert str(dt_datetime) == str(Timestamp(dt_datetime))
+        dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45)
+        assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us))
+        ts_nanos_only = Timestamp(200)
+        assert str(ts_nanos_only) == "1970-01-01 00:00:00.000000200"
+        ts_nanos_micros = Timestamp(1200)
+        assert str(ts_nanos_micros) == "1970-01-01 00:00:00.000001200"
+    def test_repr_matches_pydatetime_tz_pytz(self):
+        dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc)
+        assert str(dt_date) == str(Timestamp(dt_date))
+        dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=pytz.utc)
+        assert str(dt_datetime) == str(Timestamp(dt_datetime))
+        dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=pytz.utc)
+        assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us))
+    def test_repr_matches_pydatetime_tz_dateutil(self):
+        utc = dateutil.tz.tzutc()
+        dt_date = datetime(2013, 1, 2, tzinfo=utc)
+        assert str(dt_date) == str(Timestamp(dt_date))
+        dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=utc)
+        assert str(dt_datetime) == str(Timestamp(dt_datetime))
+        dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc)
+        assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us))

py311/lib/python3.11/site-packages/pandas/tests/scalar/timestamp/test_timezones.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""
+Tests for Timestamp timezone-related methods
+"""
+from datetime import datetime
+from pandas._libs.tslibs import timezones
+from pandas import Timestamp
+class TestTimestampTZOperations:
+    # ------------------------------------------------------------------
+    def test_timestamp_timetz_equivalent_with_datetime_tz(self, tz_naive_fixture):
+        # GH21358
+        tz = timezones.maybe_get_tz(tz_naive_fixture)
+        stamp = Timestamp("2018-06-04 10:20:30", tz=tz)
+        _datetime = datetime(2018, 6, 4, hour=10, minute=20, second=30, tzinfo=tz)
+        result = stamp.timetz()
+        expected = _datetime.timetz()
+        assert result == expected