File size: 2,654 Bytes
6d12932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

import os
import argparse
from huggingface_hub import login, HfApi

def upload_to_hub(dataset_path: str, repo_name: str, token: str = None, private: bool = True):
    """

    Uploads a local dataset (JSONL) to Hugging Face Hub using HfApi.

    

    Args:

        dataset_path: Path to the local .jsonl file.

        repo_name: Name of the repository on HF (e.g., 'username/dataset-name').

        token: HF API Token (optional if logged in via CLI).

        private: Whether the dataset should be private.

    """
    print(f"๐Ÿ“ฆ Preparing to upload '{dataset_path}' to '{repo_name}'...")
    
    # 1. Login if token provided
    if token:
        print("๐Ÿ”‘ Logging in to Hugging Face...")
        login(token=token)
    
    # 2. Check if file exists
    if not os.path.exists(dataset_path):
        raise FileNotFoundError(f"Dataset file not found: {dataset_path}")

    api = HfApi()
    
    # 3. Create Repo if it doesn't exist
    try:
        print(f"๐Ÿ”จ Creating/Checking repository '{repo_name}'...")
        api.create_repo(repo_id=repo_name, repo_type="dataset", private=private, exist_ok=True)
    except Exception as e:
        print(f"โš ๏ธ  Repo creation check failed (might already exist or permission issue): {e}")

    # 4. Upload File
    print(f"๐Ÿš€ Uploading file to Hugging Face Hub...")
    try:
        api.upload_file(
            path_or_fileobj=dataset_path,
            path_in_repo=os.path.basename(dataset_path),
            repo_id=repo_name,
            repo_type="dataset"
        )
        print(f"โœจ Success! Your dataset is live at: https://huggingface.co/datasets/{repo_name}")
        print("โ„น๏ธ  Note: On Colab, load it using: load_dataset('json', data_files='fons-relational-care-synthetic-v1.jsonl', split='train')")
    except Exception as e:
        print(f"โŒ Failed to upload dataset: {e}")
        print("Tip: Check your token permissions and repo name.")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Upload Nursing Dataset to Hugging Face")
    parser.add_argument("--file", type=str, default="fons-relational-care-synthetic-v1.jsonl", help="Path to local JSONL file")
    parser.add_argument("--repo", type=str, required=True, help="Target HF Repo (e.g. 'your-username/nursing-sbar-instruct')")
    parser.add_argument("--token", type=str, help="Hugging Face Write Token (optional)")
    parser.add_argument("--public", action="store_true", help="Make dataset public (default is private)")
    
    args = parser.parse_args()
    
    upload_to_hub(args.file, args.repo, args.token, not args.public)