PeacebinfLow commited on
Commit
94de8b5
·
verified ·
1 Parent(s): 4a72af1

Update hf_api.py

Browse files
Files changed (1) hide show
  1. hf_api.py +50 -28
hf_api.py CHANGED
@@ -1,34 +1,56 @@
1
- ```py
2
  import os
 
 
 
 
 
3
  from huggingface_hub import HfApi
4
 
5
- def require_token(env_name: str = "HF_TOKEN") -> str:
6
- token = os.getenv(env_name)
7
- if not token:
8
- raise RuntimeError(
9
- f"Missing {env_name}. Set it in your shell (export {env_name}=...) "
10
- f"or as a Hugging Face Space Secret named {env_name}."
11
- )
12
- return token
 
 
 
 
 
 
 
 
 
13
 
14
- def api() -> HfApi:
15
- return HfApi(token=require_token())
 
 
 
 
 
 
16
 
17
- def upload_delta_csv(repo_id: str, local_csv_path: str, target_config: str, commit_message: str) -> str:
18
- """
19
- Uploads a CSV delta into: data/uploads/<target_config>_<timestamp>.csv
20
- """
21
- import datetime
22
- ts = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
23
- filename = f"{target_config}_{ts}.csv"
24
- path_in_repo = f"data/uploads/{filename}"
 
 
 
 
 
25
 
26
- a = api()
27
- a.upload_file(
28
- path_or_fileobj=local_csv_path,
29
- path_in_repo=path_in_repo,
30
- repo_id=repo_id,
31
- repo_type="dataset",
32
- commit_message=commit_message or f"Upload delta CSV for {target_config}: {filename}",
33
- )
34
- return path_in_repo
 
 
1
  import os
2
+ import time
3
+ import requests
4
+ from dataclasses import dataclass
5
+ from typing import Dict, Any, Optional, List, Tuple
6
+
7
  from huggingface_hub import HfApi
8
 
9
+ HF_DATASETS_SERVER = "https://datasets-server.huggingface.co
10
+ "
11
+
12
+ @dataclass
13
+ class HFUploadResult:
14
+ repo_id: str
15
+ path_in_repo: str
16
+ commit_message: str
17
+ commit_url: Optional[str] = None
18
+
19
+ def require_token() -> str:
20
+ token = os.getenv("HF_TOKEN", "").strip()
21
+ if not token:
22
+ raise RuntimeError(
23
+ "Missing HF_TOKEN. Set it in Space Secrets: Settings → Secrets → New secret → Name=HF_TOKEN."
24
+ )
25
+ return token
26
 
27
+ def get_splits(dataset: str, token: Optional[str] = None) -> Dict[str, Any]:
28
+ headers = {}
29
+ if token:
30
+ headers["Authorization"] = f"Bearer {token}"
31
+ url = f"{HF_DATASETS_SERVER}/splits?dataset={requests.utils.quote(dataset, safe='')}"
32
+ r = requests.get(url, headers=headers, timeout=30)
33
+ r.raise_for_status()
34
+ return r.json()
35
 
36
+ def get_first_rows(dataset: str, config: str, split: str, token: Optional[str] = None) -> Dict[str, Any]:
37
+ headers = {}
38
+ if token:
39
+ headers["Authorization"] = f"Bearer {token}"
40
+ url = (
41
+ f"{HF_DATASETS_SERVER}/first-rows"
42
+ f"?dataset={requests.utils.quote(dataset, safe='')}"
43
+ f"&config={requests.utils.quote(config, safe='')}"
44
+ f"&split={requests.utils.quote(split, safe='')}"
45
+ )
46
+ r = requests.get(url, headers=headers, timeout=30)
47
+ r.raise_for_status()
48
+ return r.json()
49
 
50
+ def upload_csv_delta_to_dataset_repo(
51
+ dataset_repo_id: str,
52
+ local_csv_path: str,
53
+ target_config: str,
54
+ token: Optional[str] = None,
55
+ ) -> HFUploadResult:
56
+ """