File size: 2,351 Bytes
98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 98936b8 00c1284 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | #!/usr/bin/env python3
import os
import time
import logging
from huggingface_hub import HfApi, upload_folder
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# 从环境变量读取配置
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO = os.environ.get("OPENCODE_DATASET_REPO") # 应为 "jamesw853/opencode-data"
LOCAL_DIR = "/data"
SYNC_INTERVAL = 180 # 秒
# 目标子目录(注意:目录名包含空格,Hugging Face Hub 支持,但建议用引号)
TARGET_SUBDIR = "opencode backup" # 上传到数据集的这个子目录下
if not HF_TOKEN:
logger.error("HF_TOKEN environment variable not set. Cannot upload.")
exit(1)
if not DATASET_REPO:
logger.error("OPENCODE_DATASET_REPO environment variable not set.")
exit(1)
api = HfApi(token=HF_TOKEN)
def upload_data():
"""将 /data 目录上传到数据集的指定子目录"""
try:
if not os.path.isdir(LOCAL_DIR):
logger.warning(f"{LOCAL_DIR} does not exist, skipping upload.")
return
# 列出要上传的文件(可选,用于日志)
files = []
for root, _, filenames in os.walk(LOCAL_DIR):
for f in filenames:
files.append(os.path.relpath(os.path.join(root, f), LOCAL_DIR))
logger.info(f"Found {len(files)} file(s) to upload: {files[:5]}{'...' if len(files)>5 else ''}")
# 上传整个文件夹到数据集内的 TARGET_SUBDIR 子目录
upload_folder(
repo_id=DATASET_REPO,
folder_path=LOCAL_DIR,
path_in_repo=TARGET_SUBDIR, # 所有文件会出现在数据集 /opencode backup/ 下
repo_type="dataset",
commit_message=f"Auto backup to {TARGET_SUBDIR} at {time.strftime('%Y-%m-%d %H:%M:%S')}"
)
logger.info(f"Successfully uploaded {LOCAL_DIR} to {DATASET_REPO}/{TARGET_SUBDIR}")
except Exception as e:
logger.error(f"Upload failed: {e}")
def main():
logger.info(f"Starting backup daemon: interval={SYNC_INTERVAL}s, dataset={DATASET_REPO}, target_dir={TARGET_SUBDIR}")
# 启动后先立即执行一次
upload_data()
while True:
time.sleep(SYNC_INTERVAL)
upload_data()
if __name__ == "__main__":
main()
|