File size: 1,038 Bytes
c3674d2
 
 
235aef6
c3674d2
 
 
 
 
 
28871c3
c3674d2
 
42c02f6
 
c3674d2
 
 
235aef6
 
 
 
 
 
 
 
 
c3674d2
 
 
 
235aef6
42c02f6
c3674d2
 
 
28871c3
c3674d2
42c02f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import time
import logging
from huggingface_hub import CommitScheduler, HfApi
from pathlib import Path

logging.basicConfig(level=logging.INFO)

REPO_ID = os.environ.get("DATASET_REPO_ID")
TOKEN = os.environ.get("HF_TOKEN")
DATA_DIR = Path("/app/data/paperclip_app")

if not REPO_ID or not TOKEN:
    logging.error("Missing Secrets. Sync disabled.")
    while True: time.sleep(3600)

DATA_DIR.mkdir(parents=True, exist_ok=True)

# Silent check for the repository
api = HfApi(token=TOKEN)
try:
    api.repo_info(repo_id=REPO_ID, repo_type="dataset")
    logging.info(f"Connected to existing dataset: {REPO_ID}")
except:
    logging.info(f"Dataset not found. Attempting to create {REPO_ID}...")
    api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=True, exist_ok=True)

scheduler = CommitScheduler(
    repo_id=REPO_ID,
    repo_type="dataset",
    folder_path=DATA_DIR,
    path_in_repo="paperclip_app",
    every=5,
    token=TOKEN
)

logging.info("Paperclip Sync scheduler active.")
while True:
    time.sleep(60)