|
|
import numpy as np |
|
|
import pytest |
|
|
|
|
|
import h5py |
|
|
|
|
|
NUMPY_GE2 = int(np.__version__.split(".")[0]) >= 2 |
|
|
pytestmark = pytest.mark.skipif(not NUMPY_GE2, reason="requires numpy >=2.0") |
|
|
|
|
|
|
|
|
def test_create_with_dtype_T(writable_file): |
|
|
ds = writable_file.create_dataset("x", shape=(2, 2), dtype="T") |
|
|
data = [["foo", "bar"], ["hello world", ""]] |
|
|
ds[:] = data |
|
|
a = ds.asstr()[:] |
|
|
np.testing.assert_array_equal(a, data) |
|
|
|
|
|
ds = writable_file["x"] |
|
|
assert ds.dtype == object |
|
|
np.testing.assert_array_equal(ds.asstr()[:], data) |
|
|
|
|
|
ds[0, 0] = "baz" |
|
|
data[0][0] = "baz" |
|
|
a = ds.astype("T")[:] |
|
|
assert a.dtype.kind == "T" |
|
|
np.testing.assert_array_equal(a, data) |
|
|
|
|
|
ds[0, 0] = np.asarray("123", dtype="O") |
|
|
data[0][0] = "123" |
|
|
np.testing.assert_array_equal(ds.asstr()[:], data) |
|
|
|
|
|
|
|
|
def test_fromdata(writable_file): |
|
|
data = [["foo", "bar"]] |
|
|
np_data = np.asarray(data, dtype="T") |
|
|
x = writable_file.create_dataset("x", data=data, dtype="T") |
|
|
y = writable_file.create_dataset("y", data=data, dtype=np.dtypes.StringDType()) |
|
|
z = writable_file.create_dataset("z", data=np_data) |
|
|
|
|
|
for ds in (x, y, z): |
|
|
assert ds.dtype.kind == "O" |
|
|
np.testing.assert_array_equal(ds.astype("T")[:], np_data) |
|
|
for name in ("x", "y", "z"): |
|
|
ds = writable_file[name] |
|
|
assert ds.dtype == object |
|
|
np.testing.assert_array_equal(ds.asstr()[:], data) |
|
|
ds = ds.astype("T") |
|
|
assert ds.dtype.kind == "T" |
|
|
a = ds[:] |
|
|
assert a.dtype.kind == "T" |
|
|
np.testing.assert_array_equal(a, data) |
|
|
|
|
|
|
|
|
def test_fixed_to_variable_width(writable_file): |
|
|
data = ["foo", "longer than 8 bytes"] |
|
|
x = writable_file.create_dataset( |
|
|
"x", data=data, dtype=h5py.string_dtype(length=20) |
|
|
) |
|
|
assert x.dtype == "S20" |
|
|
|
|
|
|
|
|
y = x.astype("T") |
|
|
assert y.dtype.kind == "T" |
|
|
assert y[:].dtype.kind == "T" |
|
|
np.testing.assert_array_equal(y[:], data) |
|
|
|
|
|
|
|
|
x[0] = np.asarray("1234", dtype="T") |
|
|
data[0] = "1234" |
|
|
np.testing.assert_array_equal(y[:], data) |
|
|
|
|
|
|
|
|
def test_fixed_to_variable_width_too_short(writable_file): |
|
|
|
|
|
|
|
|
data = ["foo", "bar"] |
|
|
x = writable_file.create_dataset( |
|
|
"x", data=data, dtype=h5py.string_dtype(length=3) |
|
|
) |
|
|
assert x.dtype == "S3" |
|
|
|
|
|
|
|
|
x[0] = np.asarray("1234", dtype="T") |
|
|
np.testing.assert_array_equal(x[:], [b"123", b"bar"]) |
|
|
|
|
|
|
|
|
def test_variable_to_fixed_width(writable_file): |
|
|
data = ["foo", "longer than 8 bytes"] |
|
|
bdata = [b"foo", b"longer than 8 bytes"] |
|
|
x = writable_file.create_dataset("x", data=data, dtype="T") |
|
|
|
|
|
|
|
|
y = x.astype("S20") |
|
|
assert y.dtype == "S20" |
|
|
assert y[:].dtype == "S20" |
|
|
np.testing.assert_array_equal(y[:], bdata) |
|
|
|
|
|
y = x.astype("S3") |
|
|
assert y.dtype == "S3" |
|
|
assert y[:].dtype == "S3" |
|
|
np.testing.assert_array_equal(y[:], [b"foo", b"lon"]) |
|
|
|
|
|
|
|
|
x[0] = np.asarray(b"1234", dtype="S5") |
|
|
bdata[0] = b"1234" |
|
|
np.testing.assert_array_equal(x[:], bdata) |
|
|
|
|
|
|
|
|
def test_write_object_into_npystrings(writable_file): |
|
|
x = writable_file.create_dataset("x", data=["foo"], dtype="T") |
|
|
x[0] = np.asarray("1234", dtype="O") |
|
|
np.testing.assert_array_equal(x[:], b"1234") |
|
|
|
|
|
|
|
|
def test_write_npystrings_into_object(writable_file): |
|
|
x = writable_file.create_dataset( |
|
|
"x", data=["foo"], dtype=h5py.string_dtype() |
|
|
) |
|
|
assert x.dtype == object |
|
|
x[0] = np.asarray("1234", dtype="T") |
|
|
np.testing.assert_array_equal(x[:], b"1234") |
|
|
|
|
|
|
|
|
xa = writable_file.create_dataset( |
|
|
"xa", shape=(1,), dtype=h5py.string_dtype('ascii') |
|
|
) |
|
|
xa[0] = np.asarray("2345", dtype="T") |
|
|
np.testing.assert_array_equal(xa[:], b"2345") |
|
|
|
|
|
|
|
|
def test_fillvalue(writable_file): |
|
|
|
|
|
x = writable_file.create_dataset("x", shape=(2,), dtype="T", fillvalue="foo") |
|
|
assert isinstance(x.fillvalue, bytes) |
|
|
assert x.fillvalue == b"foo" |
|
|
assert x[0] == b"foo" |
|
|
|
|
|
|
|
|
y = writable_file.create_dataset( |
|
|
"y", shape=(2,), dtype=h5py.string_dtype(), fillvalue=b"foo" |
|
|
) |
|
|
assert isinstance(y.fillvalue, bytes) |
|
|
assert y.fillvalue == b"foo" |
|
|
assert y[0] == b"foo" |
|
|
|
|
|
y = y.astype("T") |
|
|
assert y[0] == "foo" |
|
|
|
|
|
|
|
|
def test_empty_string(writable_file): |
|
|
data = np.array(["", "a", "b"], dtype="T") |
|
|
x = writable_file.create_dataset("x", data=data) |
|
|
np.testing.assert_array_equal(x[:], [b"", b"a", b"b"]) |
|
|
np.testing.assert_array_equal(x.astype("T")[:], data) |
|
|
data[:2] = ["c", ""] |
|
|
x[:2] = data[:2] |
|
|
np.testing.assert_array_equal(x[:], [b"c", b"", b"b"]) |
|
|
np.testing.assert_array_equal(x.astype("T")[:], data) |
|
|
|
|
|
|
|
|
def test_astype_nonstring(writable_file): |
|
|
x = writable_file.create_dataset("x", shape=(2, ), dtype="i8") |
|
|
with pytest.raises(TypeError, match="HDF5 string datatype"): |
|
|
x.astype("T") |
|
|
|