File size: 2,654 Bytes
6d12932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import argparse
from huggingface_hub import login, HfApi
def upload_to_hub(dataset_path: str, repo_name: str, token: str = None, private: bool = True):
"""
Uploads a local dataset (JSONL) to Hugging Face Hub using HfApi.
Args:
dataset_path: Path to the local .jsonl file.
repo_name: Name of the repository on HF (e.g., 'username/dataset-name').
token: HF API Token (optional if logged in via CLI).
private: Whether the dataset should be private.
"""
print(f"๐ฆ Preparing to upload '{dataset_path}' to '{repo_name}'...")
# 1. Login if token provided
if token:
print("๐ Logging in to Hugging Face...")
login(token=token)
# 2. Check if file exists
if not os.path.exists(dataset_path):
raise FileNotFoundError(f"Dataset file not found: {dataset_path}")
api = HfApi()
# 3. Create Repo if it doesn't exist
try:
print(f"๐จ Creating/Checking repository '{repo_name}'...")
api.create_repo(repo_id=repo_name, repo_type="dataset", private=private, exist_ok=True)
except Exception as e:
print(f"โ ๏ธ Repo creation check failed (might already exist or permission issue): {e}")
# 4. Upload File
print(f"๐ Uploading file to Hugging Face Hub...")
try:
api.upload_file(
path_or_fileobj=dataset_path,
path_in_repo=os.path.basename(dataset_path),
repo_id=repo_name,
repo_type="dataset"
)
print(f"โจ Success! Your dataset is live at: https://huggingface.co/datasets/{repo_name}")
print("โน๏ธ Note: On Colab, load it using: load_dataset('json', data_files='fons-relational-care-synthetic-v1.jsonl', split='train')")
except Exception as e:
print(f"โ Failed to upload dataset: {e}")
print("Tip: Check your token permissions and repo name.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Upload Nursing Dataset to Hugging Face")
parser.add_argument("--file", type=str, default="fons-relational-care-synthetic-v1.jsonl", help="Path to local JSONL file")
parser.add_argument("--repo", type=str, required=True, help="Target HF Repo (e.g. 'your-username/nursing-sbar-instruct')")
parser.add_argument("--token", type=str, help="Hugging Face Write Token (optional)")
parser.add_argument("--public", action="store_true", help="Make dataset public (default is private)")
args = parser.parse_args()
upload_to_hub(args.file, args.repo, args.token, not args.public)
|