#!/usr/bin/env python3 import os import time import logging from huggingface_hub import HfApi, upload_folder # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # 从环境变量读取配置 HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_REPO = os.environ.get("OPENCODE_DATASET_REPO") # 应为 "jamesw853/opencode-data" LOCAL_DIR = "/data" SYNC_INTERVAL = 180 # 秒 # 目标子目录(注意:目录名包含空格,Hugging Face Hub 支持,但建议用引号) TARGET_SUBDIR = "opencode backup" # 上传到数据集的这个子目录下 if not HF_TOKEN: logger.error("HF_TOKEN environment variable not set. Cannot upload.") exit(1) if not DATASET_REPO: logger.error("OPENCODE_DATASET_REPO environment variable not set.") exit(1) api = HfApi(token=HF_TOKEN) def upload_data(): """将 /data 目录上传到数据集的指定子目录""" try: if not os.path.isdir(LOCAL_DIR): logger.warning(f"{LOCAL_DIR} does not exist, skipping upload.") return # 列出要上传的文件(可选,用于日志) files = [] for root, _, filenames in os.walk(LOCAL_DIR): for f in filenames: files.append(os.path.relpath(os.path.join(root, f), LOCAL_DIR)) logger.info(f"Found {len(files)} file(s) to upload: {files[:5]}{'...' if len(files)>5 else ''}") # 上传整个文件夹到数据集内的 TARGET_SUBDIR 子目录 upload_folder( repo_id=DATASET_REPO, folder_path=LOCAL_DIR, path_in_repo=TARGET_SUBDIR, # 所有文件会出现在数据集 /opencode backup/ 下 repo_type="dataset", commit_message=f"Auto backup to {TARGET_SUBDIR} at {time.strftime('%Y-%m-%d %H:%M:%S')}" ) logger.info(f"Successfully uploaded {LOCAL_DIR} to {DATASET_REPO}/{TARGET_SUBDIR}") except Exception as e: logger.error(f"Upload failed: {e}") def main(): logger.info(f"Starting backup daemon: interval={SYNC_INTERVAL}s, dataset={DATASET_REPO}, target_dir={TARGET_SUBDIR}") # 启动后先立即执行一次 upload_data() while True: time.sleep(SYNC_INTERVAL) upload_data() if __name__ == "__main__": main()