| import os |
| import time |
| import logging |
| from huggingface_hub import CommitScheduler, HfApi |
| from pathlib import Path |
|
|
| logging.basicConfig(level=logging.INFO) |
|
|
| REPO_ID = os.environ.get("DATASET_REPO_ID") |
| TOKEN = os.environ.get("HF_TOKEN") |
| DATA_DIR = Path("/app/data/paperclip_app") |
|
|
| if not REPO_ID or not TOKEN: |
| logging.error("Missing Secrets. Sync disabled.") |
| while True: time.sleep(3600) |
|
|
| DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
| |
| api = HfApi(token=TOKEN) |
| try: |
| api.repo_info(repo_id=REPO_ID, repo_type="dataset") |
| logging.info(f"Connected to existing dataset: {REPO_ID}") |
| except: |
| logging.info(f"Dataset not found. Attempting to create {REPO_ID}...") |
| api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=True, exist_ok=True) |
|
|
| scheduler = CommitScheduler( |
| repo_id=REPO_ID, |
| repo_type="dataset", |
| folder_path=DATA_DIR, |
| path_in_repo="paperclip_app", |
| every=5, |
| token=TOKEN |
| ) |
|
|
| logging.info("Paperclip Sync scheduler active.") |
| while True: |
| time.sleep(60) |
|
|