"""E0 peek: discover the schema of lucky9-cyou/mimic-iv-aligned-ppg-ecg before full audit.""" import os from dotenv import load_dotenv load_dotenv() os.environ.setdefault("HF_TOKEN", os.environ.get("HUGGINGFACE_API_KEY", "")) from datasets import load_dataset, get_dataset_config_names, get_dataset_split_names DS_NAME = "lucky9-cyou/mimic-iv-aligned-ppg-ecg" print("=== configs ===") try: print(get_dataset_config_names(DS_NAME)) except Exception as e: print("err:", e) print("=== splits ===") try: print(get_dataset_split_names(DS_NAME)) except Exception as e: print("err:", e) print("=== stream first sample ===") ds = load_dataset(DS_NAME, split="train", streaming=True) print("features:", ds.features) it = iter(ds) s = next(it) print("keys:", list(s.keys())) for k, v in s.items(): if hasattr(v, "__len__") and not isinstance(v, str): try: import numpy as np arr = np.asarray(v) print(f" {k}: shape={arr.shape} dtype={arr.dtype}") except Exception: print(f" {k}: len={len(v)} type={type(v).__name__}") else: print(f" {k}: {v!r}"[:200])