paperclip / sync.py
abc1181's picture
Update sync.py
b519353 verified
raw
history blame contribute delete
935 Bytes
import os
import time
import logging
from huggingface_hub import CommitScheduler, HfApi
from pathlib import Path
logging.basicConfig(level=logging.INFO)
REPO_ID = os.environ.get("DATASET_REPO_ID")
TOKEN = os.environ.get("HF_TOKEN")
DATA_DIR = Path("/app/data/paperclip_app")
if not REPO_ID or not TOKEN:
logging.error("Sync disabled: Missing HF_TOKEN or DATASET_REPO_ID")
while True:
time.sleep(3600)
api = HfApi(token=TOKEN)
try:
api.repo_info(repo_id=REPO_ID, repo_type="dataset")
except Exception:
api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=True, exist_ok=True)
scheduler = CommitScheduler(
repo_id=REPO_ID,
repo_type="dataset",
folder_path=DATA_DIR,
path_in_repo="paperclip_app",
every=5,
token=TOKEN,
ignore_patterns=["instances/default/db/*", "instances/default/logs/*"]
)
logging.info("Paperclip sync scheduler active.")
while True:
time.sleep(60)