| |
| import os |
| import time |
| import logging |
| from huggingface_hub import HfApi, upload_folder |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| DATASET_REPO = os.environ.get("OPENCODE_DATASET_REPO") |
| LOCAL_DIR = "/data" |
| SYNC_INTERVAL = 180 |
|
|
| |
| TARGET_SUBDIR = "opencode backup" |
|
|
| if not HF_TOKEN: |
| logger.error("HF_TOKEN environment variable not set. Cannot upload.") |
| exit(1) |
| if not DATASET_REPO: |
| logger.error("OPENCODE_DATASET_REPO environment variable not set.") |
| exit(1) |
|
|
| api = HfApi(token=HF_TOKEN) |
|
|
| def upload_data(): |
| """将 /data 目录上传到数据集的指定子目录""" |
| try: |
| if not os.path.isdir(LOCAL_DIR): |
| logger.warning(f"{LOCAL_DIR} does not exist, skipping upload.") |
| return |
| |
| |
| files = [] |
| for root, _, filenames in os.walk(LOCAL_DIR): |
| for f in filenames: |
| files.append(os.path.relpath(os.path.join(root, f), LOCAL_DIR)) |
| logger.info(f"Found {len(files)} file(s) to upload: {files[:5]}{'...' if len(files)>5 else ''}") |
| |
| |
| upload_folder( |
| repo_id=DATASET_REPO, |
| folder_path=LOCAL_DIR, |
| path_in_repo=TARGET_SUBDIR, |
| repo_type="dataset", |
| commit_message=f"Auto backup to {TARGET_SUBDIR} at {time.strftime('%Y-%m-%d %H:%M:%S')}" |
| ) |
| logger.info(f"Successfully uploaded {LOCAL_DIR} to {DATASET_REPO}/{TARGET_SUBDIR}") |
| except Exception as e: |
| logger.error(f"Upload failed: {e}") |
|
|
| def main(): |
| logger.info(f"Starting backup daemon: interval={SYNC_INTERVAL}s, dataset={DATASET_REPO}, target_dir={TARGET_SUBDIR}") |
| |
| upload_data() |
| while True: |
| time.sleep(SYNC_INTERVAL) |
| upload_data() |
|
|
| if __name__ == "__main__": |
| main() |
|
|