File size: 1,702 Bytes
b2e8697 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import os
import time
import subprocess
import threading
from huggingface_hub import HfApi, snapshot_download
# 配置
TOKEN = os.getenv("HF_TOKEN")
DATASET_ID = os.getenv("DATASET_ID")
LOCAL_DIR = "data" # 项目存储账号和数据库的目录
api = HfApi(token=TOKEN)
def download_data():
"""从 Dataset 下载数据到本地"""
if not DATASET_ID:
print("未配置 DATASET_ID,跳过下载")
return
try:
print(f"正在从 {DATASET_ID} 同步数据...")
snapshot_download(
repo_id=DATASET_ID,
repo_type="dataset",
local_dir=LOCAL_DIR,
token=TOKEN
)
print("同步完成")
except Exception as e:
print(f"下载失败(可能是首次运行,仓库为空): {e}")
def upload_data():
"""定时上传数据到 Dataset"""
while True:
time.sleep(300) # 每 5 分钟备份一次
if DATASET_ID:
try:
api.upload_folder(
folder_path=LOCAL_DIR,
repo_id=DATASET_ID,
repo_type="dataset"
)
print("备份成功至 Dataset")
except Exception as e:
print(f"备份失败: {e}")
if __name__ == "__main__":
# 1. 启动前先下载
download_data()
# 2. 启动定时备份线程
backup_thread = threading.Thread(target=upload_data, daemon=True)
backup_thread.start()
# 3. 运行原有的 网页UI (注意:此处要和你的 Dockerfile 启动命令一致)
# 使用 subprocess 运行,防止主进程阻塞
subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"]) |