PhysioJEPA / scripts /e0_peek.py
guychuk's picture
Upload folder using huggingface_hub
31e2456 verified
"""E0 peek: discover the schema of lucky9-cyou/mimic-iv-aligned-ppg-ecg before full audit."""
import os
from dotenv import load_dotenv
load_dotenv()
os.environ.setdefault("HF_TOKEN", os.environ.get("HUGGINGFACE_API_KEY", ""))
from datasets import load_dataset, get_dataset_config_names, get_dataset_split_names
DS_NAME = "lucky9-cyou/mimic-iv-aligned-ppg-ecg"
print("=== configs ===")
try:
print(get_dataset_config_names(DS_NAME))
except Exception as e:
print("err:", e)
print("=== splits ===")
try:
print(get_dataset_split_names(DS_NAME))
except Exception as e:
print("err:", e)
print("=== stream first sample ===")
ds = load_dataset(DS_NAME, split="train", streaming=True)
print("features:", ds.features)
it = iter(ds)
s = next(it)
print("keys:", list(s.keys()))
for k, v in s.items():
if hasattr(v, "__len__") and not isinstance(v, str):
try:
import numpy as np
arr = np.asarray(v)
print(f" {k}: shape={arr.shape} dtype={arr.dtype}")
except Exception:
print(f" {k}: len={len(v)} type={type(v).__name__}")
else:
print(f" {k}: {v!r}"[:200])