import os import time import subprocess import threading from huggingface_hub import HfApi, snapshot_download # 配置 TOKEN = os.getenv("HF_TOKEN") DATASET_ID = os.getenv("DATASET_ID") LOCAL_DIR = "data" # 项目存储账号和数据库的目录 api = HfApi(token=TOKEN) def download_data(): """从 Dataset 下载数据到本地""" if not DATASET_ID: print("未配置 DATASET_ID,跳过下载") return try: print(f"正在从 {DATASET_ID} 同步数据...") snapshot_download( repo_id=DATASET_ID, repo_type="dataset", local_dir=LOCAL_DIR, token=TOKEN ) print("同步完成") except Exception as e: print(f"下载失败(可能是首次运行,仓库为空): {e}") def upload_data(): """定时上传数据到 Dataset""" while True: time.sleep(300) # 每 5 分钟备份一次 if DATASET_ID: try: api.upload_folder( folder_path=LOCAL_DIR, repo_id=DATASET_ID, repo_type="dataset" ) print("备份成功至 Dataset") except Exception as e: print(f"备份失败: {e}") if __name__ == "__main__": # 1. 启动前先下载 download_data() # 2. 启动定时备份线程 backup_thread = threading.Thread(target=upload_data, daemon=True) backup_thread.start() # 3. 运行原有的 网页UI (注意:此处要和你的 Dockerfile 启动命令一致) # 使用 subprocess 运行,防止主进程阻塞 subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])