pcai / pull.py
abc1181's picture
Create pull.py
3045bc4 verified
import os
import sys
from huggingface_hub import HfApi, snapshot_download
from pathlib import Path
REPO_ID = os.environ.get("DATASET_REPO_ID")
TOKEN = os.environ.get("HF_TOKEN")
# We only want to pull/push from this subfolder
TARGET_DIR = Path("/app/data/paperclip_app")
if not REPO_ID or not TOKEN:
print("Secrets not set. Skipping download.")
sys.exit(0)
print(f"Checking for existing Paperclip data in {REPO_ID}...")
try:
# This downloads only the 'paperclip_app' folder from your dataset if it exists
snapshot_download(
repo_id=REPO_ID,
repo_type="dataset",
local_dir="/app/data",
allow_patterns="paperclip_app/*",
token=TOKEN
)
print("Download complete.")
except Exception as e:
print(f"No existing paperclip data found (Starting fresh): {e}")