| """E0 peek: discover the schema of lucky9-cyou/mimic-iv-aligned-ppg-ecg before full audit.""" | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| os.environ.setdefault("HF_TOKEN", os.environ.get("HUGGINGFACE_API_KEY", "")) | |
| from datasets import load_dataset, get_dataset_config_names, get_dataset_split_names | |
| DS_NAME = "lucky9-cyou/mimic-iv-aligned-ppg-ecg" | |
| print("=== configs ===") | |
| try: | |
| print(get_dataset_config_names(DS_NAME)) | |
| except Exception as e: | |
| print("err:", e) | |
| print("=== splits ===") | |
| try: | |
| print(get_dataset_split_names(DS_NAME)) | |
| except Exception as e: | |
| print("err:", e) | |
| print("=== stream first sample ===") | |
| ds = load_dataset(DS_NAME, split="train", streaming=True) | |
| print("features:", ds.features) | |
| it = iter(ds) | |
| s = next(it) | |
| print("keys:", list(s.keys())) | |
| for k, v in s.items(): | |
| if hasattr(v, "__len__") and not isinstance(v, str): | |
| try: | |
| import numpy as np | |
| arr = np.asarray(v) | |
| print(f" {k}: shape={arr.shape} dtype={arr.dtype}") | |
| except Exception: | |
| print(f" {k}: len={len(v)} type={type(v).__name__}") | |
| else: | |
| print(f" {k}: {v!r}"[:200]) | |