pcai / sync.py
abc1181's picture
Update sync.py
235aef6 verified
import os
import time
import logging
from huggingface_hub import CommitScheduler, HfApi
from pathlib import Path
logging.basicConfig(level=logging.INFO)
REPO_ID = os.environ.get("DATASET_REPO_ID")
TOKEN = os.environ.get("HF_TOKEN")
DATA_DIR = Path("/app/data/paperclip_app")
if not REPO_ID or not TOKEN:
logging.error("Missing Secrets. Sync disabled.")
while True: time.sleep(3600)
DATA_DIR.mkdir(parents=True, exist_ok=True)
# Silent check for the repository
api = HfApi(token=TOKEN)
try:
api.repo_info(repo_id=REPO_ID, repo_type="dataset")
logging.info(f"Connected to existing dataset: {REPO_ID}")
except:
logging.info(f"Dataset not found. Attempting to create {REPO_ID}...")
api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=True, exist_ok=True)
scheduler = CommitScheduler(
repo_id=REPO_ID,
repo_type="dataset",
folder_path=DATA_DIR,
path_in_repo="paperclip_app",
every=5,
token=TOKEN
)
logging.info("Paperclip Sync scheduler active.")
while True:
time.sleep(60)