import os import time import logging from huggingface_hub import CommitScheduler, HfApi from pathlib import Path logging.basicConfig(level=logging.INFO) REPO_ID = os.environ.get("DATASET_REPO_ID") TOKEN = os.environ.get("HF_TOKEN") DATA_DIR = Path("/app/data/paperclip_app") if not REPO_ID or not TOKEN: logging.error("Missing Secrets. Sync disabled.") while True: time.sleep(3600) DATA_DIR.mkdir(parents=True, exist_ok=True) # Silent check for the repository api = HfApi(token=TOKEN) try: api.repo_info(repo_id=REPO_ID, repo_type="dataset") logging.info(f"Connected to existing dataset: {REPO_ID}") except: logging.info(f"Dataset not found. Attempting to create {REPO_ID}...") api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=True, exist_ok=True) scheduler = CommitScheduler( repo_id=REPO_ID, repo_type="dataset", folder_path=DATA_DIR, path_in_repo="paperclip_app", every=5, token=TOKEN ) logging.info("Paperclip Sync scheduler active.") while True: time.sleep(60)