Spaces:
Sleeping
Sleeping
File size: 2,334 Bytes
0f77bc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# tools/upload_to_hf.py
import os
from huggingface_hub import HfApi, create_repo
import json
from pathlib import Path
def upload_dataset():
"""Upload dataset to Hugging Face using GitHub secrets"""
# Get credentials from GitHub secrets
hf_token = os.getenv("HF_WRITE_TOKEN")
hf_username = os.getenv("HF_USERNAME")
if not hf_token:
raise ValueError("β HF_WRITE_TOKEN secret not found in GitHub")
if not hf_username:
raise ValueError("β HF_USERNAME secret not found in GitHub")
# Build repo ID from username
hf_repo = f"{hf_username}/sap-dataset"
print(f"π€ Uploading to Hugging Face: {hf_repo}")
# Initialize HF API
api = HfApi(token=hf_token)
# Create repo if it doesn't exist
try:
create_repo(repo_id=hf_repo, repo_type="dataset", exist_ok=True, token=hf_token)
print("β
Repository ready")
except Exception as e:
print(f"β οΈ Note: {e}")
# Upload dataset file
dataset_path = "data/sap_dataset.json"
if Path(dataset_path).exists():
api.upload_file(
path_or_fileobj=dataset_path,
path_in_repo="sap_dataset.json",
repo_id=hf_repo,
repo_type="dataset",
token=hf_token
)
print(f"β
Dataset uploaded successfully to {hf_repo}")
# Also upload a dataset card
dataset_card = {
"dataset_name": "SAP Knowledge Base",
"description": "Multi-source SAP dataset (Community, StackOverflow, GitHub, Dev.to, Medium, SAP Developers tutorials)",
"language": "en",
"task_categories": ["question-answering", "text-generation"],
"tags": ["sap", "basis", "abap", "hana", "btp", "fiori", "ui5", "qa"]
}
with open("data/dataset_card.json", "w") as f:
json.dump(dataset_card, f, indent=2)
api.upload_file(
path_or_fileobj="data/dataset_card.json",
path_in_repo="dataset_card.json",
repo_id=hf_repo,
repo_type="dataset",
token=hf_token
)
print("β
Dataset card uploaded")
else:
print(f"β Dataset file {dataset_path} not found")
if __name__ == "__main__":
upload_dataset()
|