"""Service layer for pulling up to N rows out of a HF dataset, for real
(not just a peek). Streams where the dataset supports it so we never
download more than was asked for.
"""
from __future__ import annotations

from typing import Optional

from datasets import load_dataset


def load_limited(
    repo_id: str,
    subset: str,
    split: str,
    limit: int,
    token: Optional[str] = None,
) -> list:
    try:
        ds = load_dataset(repo_id, subset or None, split=split, streaming=True, token=token)
        rows = []
        for i, row in enumerate(ds):
            if i >= limit:
                break
            rows.append(dict(row))
        return rows
    except Exception:
        # Some datasets (mostly older script-based ones) don't support
        # streaming. Fall back to a full load and slice - slower, but
        # correct, and rare enough not to special-case earlier.
        ds = load_dataset(repo_id, subset or None, split=split, token=token)
        n = min(limit, len(ds))
        return [dict(ds[i]) for i in range(n)]