PeacebinfLow's picture
Update hf_api.py
94de8b5 verified
import os
import time
import requests
from dataclasses import dataclass
from typing import Dict, Any, Optional, List, Tuple
from huggingface_hub import HfApi
HF_DATASETS_SERVER = "https://datasets-server.huggingface.co
"
@dataclass
class HFUploadResult:
repo_id: str
path_in_repo: str
commit_message: str
commit_url: Optional[str] = None
def require_token() -> str:
token = os.getenv("HF_TOKEN", "").strip()
if not token:
raise RuntimeError(
"Missing HF_TOKEN. Set it in Space Secrets: Settings → Secrets → New secret → Name=HF_TOKEN."
)
return token
def get_splits(dataset: str, token: Optional[str] = None) -> Dict[str, Any]:
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
url = f"{HF_DATASETS_SERVER}/splits?dataset={requests.utils.quote(dataset, safe='')}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
return r.json()
def get_first_rows(dataset: str, config: str, split: str, token: Optional[str] = None) -> Dict[str, Any]:
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
url = (
f"{HF_DATASETS_SERVER}/first-rows"
f"?dataset={requests.utils.quote(dataset, safe='')}"
f"&config={requests.utils.quote(config, safe='')}"
f"&split={requests.utils.quote(split, safe='')}"
)
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
return r.json()
def upload_csv_delta_to_dataset_repo(
dataset_repo_id: str,
local_csv_path: str,
target_config: str,
token: Optional[str] = None,
) -> HFUploadResult:
"""