File size: 1,148 Bytes
31e2456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""E0 peek: discover the schema of lucky9-cyou/mimic-iv-aligned-ppg-ecg before full audit."""
import os
from dotenv import load_dotenv

load_dotenv()
os.environ.setdefault("HF_TOKEN", os.environ.get("HUGGINGFACE_API_KEY", ""))

from datasets import load_dataset, get_dataset_config_names, get_dataset_split_names

DS_NAME = "lucky9-cyou/mimic-iv-aligned-ppg-ecg"

print("=== configs ===")
try:
    print(get_dataset_config_names(DS_NAME))
except Exception as e:
    print("err:", e)

print("=== splits ===")
try:
    print(get_dataset_split_names(DS_NAME))
except Exception as e:
    print("err:", e)

print("=== stream first sample ===")
ds = load_dataset(DS_NAME, split="train", streaming=True)
print("features:", ds.features)
it = iter(ds)
s = next(it)
print("keys:", list(s.keys()))
for k, v in s.items():
    if hasattr(v, "__len__") and not isinstance(v, str):
        try:
            import numpy as np

            arr = np.asarray(v)
            print(f"  {k}: shape={arr.shape} dtype={arr.dtype}")
        except Exception:
            print(f"  {k}: len={len(v)} type={type(v).__name__}")
    else:
        print(f"  {k}: {v!r}"[:200])