import os import time import logging from huggingface_hub import CommitScheduler, HfApi from pathlib import Path logging.basicConfig(level=logging.INFO) REPO_ID = os.environ.get("DATASET_REPO_ID") TOKEN = os.environ.get("HF_TOKEN") DATA_DIR = Path("/app/data/paperclip_app") if not REPO_ID or not TOKEN: logging.error("Sync disabled: Missing HF_TOKEN or DATASET_REPO_ID") while True: time.sleep(3600) api = HfApi(token=TOKEN) try: api.repo_info(repo_id=REPO_ID, repo_type="dataset") except Exception: api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=True, exist_ok=True) scheduler = CommitScheduler( repo_id=REPO_ID, repo_type="dataset", folder_path=DATA_DIR, path_in_repo="paperclip_app", every=5, token=TOKEN, ignore_patterns=["instances/default/db/*", "instances/default/logs/*"] ) logging.info("Paperclip sync scheduler active.") while True: time.sleep(60)