| import os |
| import time |
| import requests |
| from dataclasses import dataclass |
| from typing import Dict, Any, Optional, List, Tuple |
|
|
| from huggingface_hub import HfApi |
|
|
| HF_DATASETS_SERVER = "https://datasets-server.huggingface.co |
| " |
|
|
| @dataclass |
| class HFUploadResult: |
| repo_id: str |
| path_in_repo: str |
| commit_message: str |
| commit_url: Optional[str] = None |
|
|
| def require_token() -> str: |
| token = os.getenv("HF_TOKEN", "").strip() |
| if not token: |
| raise RuntimeError( |
| "Missing HF_TOKEN. Set it in Space Secrets: Settings → Secrets → New secret → Name=HF_TOKEN." |
| ) |
| return token |
|
|
| def get_splits(dataset: str, token: Optional[str] = None) -> Dict[str, Any]: |
| headers = {} |
| if token: |
| headers["Authorization"] = f"Bearer {token}" |
| url = f"{HF_DATASETS_SERVER}/splits?dataset={requests.utils.quote(dataset, safe='')}" |
| r = requests.get(url, headers=headers, timeout=30) |
| r.raise_for_status() |
| return r.json() |
|
|
| def get_first_rows(dataset: str, config: str, split: str, token: Optional[str] = None) -> Dict[str, Any]: |
| headers = {} |
| if token: |
| headers["Authorization"] = f"Bearer {token}" |
| url = ( |
| f"{HF_DATASETS_SERVER}/first-rows" |
| f"?dataset={requests.utils.quote(dataset, safe='')}" |
| f"&config={requests.utils.quote(config, safe='')}" |
| f"&split={requests.utils.quote(split, safe='')}" |
| ) |
| r = requests.get(url, headers=headers, timeout=30) |
| r.raise_for_status() |
| return r.json() |
|
|
| def upload_csv_delta_to_dataset_repo( |
| dataset_repo_id: str, |
| local_csv_path: str, |
| target_config: str, |
| token: Optional[str] = None, |
| ) -> HFUploadResult: |
| """ |