joebruce1313's picture
Upload 38004 files
1f5470c verified
import numpy as np
import pytest
import h5py
NUMPY_GE2 = int(np.__version__.split(".")[0]) >= 2
pytestmark = pytest.mark.skipif(not NUMPY_GE2, reason="requires numpy >=2.0")
def test_create_with_dtype_T(writable_file):
ds = writable_file.create_dataset("x", shape=(2, 2), dtype="T")
data = [["foo", "bar"], ["hello world", ""]]
ds[:] = data
a = ds.asstr()[:]
np.testing.assert_array_equal(a, data)
ds = writable_file["x"]
assert ds.dtype == object
np.testing.assert_array_equal(ds.asstr()[:], data)
ds[0, 0] = "baz"
data[0][0] = "baz"
a = ds.astype("T")[:]
assert a.dtype.kind == "T"
np.testing.assert_array_equal(a, data)
ds[0, 0] = np.asarray("123", dtype="O")
data[0][0] = "123"
np.testing.assert_array_equal(ds.asstr()[:], data)
def test_fromdata(writable_file):
data = [["foo", "bar"]]
np_data = np.asarray(data, dtype="T")
x = writable_file.create_dataset("x", data=data, dtype="T")
y = writable_file.create_dataset("y", data=data, dtype=np.dtypes.StringDType())
z = writable_file.create_dataset("z", data=np_data)
for ds in (x, y, z):
assert ds.dtype.kind == "O"
np.testing.assert_array_equal(ds.astype("T")[:], np_data)
for name in ("x", "y", "z"):
ds = writable_file[name]
assert ds.dtype == object
np.testing.assert_array_equal(ds.asstr()[:], data)
ds = ds.astype("T")
assert ds.dtype.kind == "T"
a = ds[:]
assert a.dtype.kind == "T"
np.testing.assert_array_equal(a, data)
def test_fixed_to_variable_width(writable_file):
data = ["foo", "longer than 8 bytes"]
x = writable_file.create_dataset(
"x", data=data, dtype=h5py.string_dtype(length=20)
)
assert x.dtype == "S20"
# read T <- S
y = x.astype("T")
assert y.dtype.kind == "T"
assert y[:].dtype.kind == "T"
np.testing.assert_array_equal(y[:], data)
# write T -> S
x[0] = np.asarray("1234", dtype="T")
data[0] = "1234"
np.testing.assert_array_equal(y[:], data)
def test_fixed_to_variable_width_too_short(writable_file):
# Note: this test triggers calls to H5Tconvert which are otherwise skipped.
data = ["foo", "bar"]
x = writable_file.create_dataset(
"x", data=data, dtype=h5py.string_dtype(length=3)
)
assert x.dtype == "S3"
# write T -> S
x[0] = np.asarray("1234", dtype="T")
np.testing.assert_array_equal(x[:], [b"123", b"bar"])
def test_variable_to_fixed_width(writable_file):
data = ["foo", "longer than 8 bytes"]
bdata = [b"foo", b"longer than 8 bytes"]
x = writable_file.create_dataset("x", data=data, dtype="T")
# read S <- T
y = x.astype("S20")
assert y.dtype == "S20"
assert y[:].dtype == "S20"
np.testing.assert_array_equal(y[:], bdata)
y = x.astype("S3")
assert y.dtype == "S3"
assert y[:].dtype == "S3"
np.testing.assert_array_equal(y[:], [b"foo", b"lon"])
# write S -> T
x[0] = np.asarray(b"1234", dtype="S5")
bdata[0] = b"1234"
np.testing.assert_array_equal(x[:], bdata)
def test_write_object_into_npystrings(writable_file):
x = writable_file.create_dataset("x", data=["foo"], dtype="T")
x[0] = np.asarray("1234", dtype="O")
np.testing.assert_array_equal(x[:], b"1234")
def test_write_npystrings_into_object(writable_file):
x = writable_file.create_dataset(
"x", data=["foo"], dtype=h5py.string_dtype()
)
assert x.dtype == object
x[0] = np.asarray("1234", dtype="T")
np.testing.assert_array_equal(x[:], b"1234")
# Test with HDF5 variable-length strings with ASCII character set
xa = writable_file.create_dataset(
"xa", shape=(1,), dtype=h5py.string_dtype('ascii')
)
xa[0] = np.asarray("2345", dtype="T")
np.testing.assert_array_equal(xa[:], b"2345")
def test_fillvalue(writable_file):
# Create as NpyString dtype
x = writable_file.create_dataset("x", shape=(2,), dtype="T", fillvalue="foo")
assert isinstance(x.fillvalue, bytes)
assert x.fillvalue == b"foo"
assert x[0] == b"foo"
# Create as object dtype
y = writable_file.create_dataset(
"y", shape=(2,), dtype=h5py.string_dtype(), fillvalue=b"foo"
)
assert isinstance(y.fillvalue, bytes)
assert y.fillvalue == b"foo"
assert y[0] == b"foo"
# Convert object dtype to NpyString
y = y.astype("T")
assert y[0] == "foo"
def test_empty_string(writable_file):
data = np.array(["", "a", "b"], dtype="T")
x = writable_file.create_dataset("x", data=data)
np.testing.assert_array_equal(x[:], [b"", b"a", b"b"])
np.testing.assert_array_equal(x.astype("T")[:], data)
data[:2] = ["c", ""]
x[:2] = data[:2]
np.testing.assert_array_equal(x[:], [b"c", b"", b"b"])
np.testing.assert_array_equal(x.astype("T")[:], data)
def test_astype_nonstring(writable_file):
x = writable_file.create_dataset("x", shape=(2, ), dtype="i8")
with pytest.raises(TypeError, match="HDF5 string datatype"):
x.astype("T")