File size: 2,351 Bytes
98936b8
 
 
 
 
 
00c1284
98936b8
 
 
00c1284
98936b8
00c1284
 
 
 
 
 
98936b8
 
00c1284
98936b8
 
00c1284
98936b8
 
 
 
00c1284
 
98936b8
00c1284
 
98936b8
00c1284
 
 
 
 
 
 
 
 
98936b8
 
00c1284
 
98936b8
00c1284
98936b8
00c1284
98936b8
00c1284
98936b8
00c1284
 
 
 
98936b8
 
00c1284
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
import os
import time
import logging
from huggingface_hub import HfApi, upload_folder

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 从环境变量读取配置
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO = os.environ.get("OPENCODE_DATASET_REPO")  # 应为 "jamesw853/opencode-data"
LOCAL_DIR = "/data"
SYNC_INTERVAL = 180  # 秒

# 目标子目录(注意:目录名包含空格,Hugging Face Hub 支持,但建议用引号)
TARGET_SUBDIR = "opencode backup"   # 上传到数据集的这个子目录下

if not HF_TOKEN:
    logger.error("HF_TOKEN environment variable not set. Cannot upload.")
    exit(1)
if not DATASET_REPO:
    logger.error("OPENCODE_DATASET_REPO environment variable not set.")
    exit(1)

api = HfApi(token=HF_TOKEN)

def upload_data():
    """将 /data 目录上传到数据集的指定子目录"""
    try:
        if not os.path.isdir(LOCAL_DIR):
            logger.warning(f"{LOCAL_DIR} does not exist, skipping upload.")
            return
        
        # 列出要上传的文件(可选,用于日志)
        files = []
        for root, _, filenames in os.walk(LOCAL_DIR):
            for f in filenames:
                files.append(os.path.relpath(os.path.join(root, f), LOCAL_DIR))
        logger.info(f"Found {len(files)} file(s) to upload: {files[:5]}{'...' if len(files)>5 else ''}")
        
        # 上传整个文件夹到数据集内的 TARGET_SUBDIR 子目录
        upload_folder(
            repo_id=DATASET_REPO,
            folder_path=LOCAL_DIR,
            path_in_repo=TARGET_SUBDIR,   # 所有文件会出现在数据集 /opencode backup/ 下
            repo_type="dataset",
            commit_message=f"Auto backup to {TARGET_SUBDIR} at {time.strftime('%Y-%m-%d %H:%M:%S')}"
        )
        logger.info(f"Successfully uploaded {LOCAL_DIR} to {DATASET_REPO}/{TARGET_SUBDIR}")
    except Exception as e:
        logger.error(f"Upload failed: {e}")

def main():
    logger.info(f"Starting backup daemon: interval={SYNC_INTERVAL}s, dataset={DATASET_REPO}, target_dir={TARGET_SUBDIR}")
    # 启动后先立即执行一次
    upload_data()
    while True:
        time.sleep(SYNC_INTERVAL)
        upload_data()

if __name__ == "__main__":
    main()