code2 / sync_run.py
gallyg's picture
Create sync_run.py
b2e8697 verified
raw
history blame
1.7 kB
import os
import time
import subprocess
import threading
from huggingface_hub import HfApi, snapshot_download
# 配置
TOKEN = os.getenv("HF_TOKEN")
DATASET_ID = os.getenv("DATASET_ID")
LOCAL_DIR = "data" # 项目存储账号和数据库的目录
api = HfApi(token=TOKEN)
def download_data():
"""从 Dataset 下载数据到本地"""
if not DATASET_ID:
print("未配置 DATASET_ID,跳过下载")
return
try:
print(f"正在从 {DATASET_ID} 同步数据...")
snapshot_download(
repo_id=DATASET_ID,
repo_type="dataset",
local_dir=LOCAL_DIR,
token=TOKEN
)
print("同步完成")
except Exception as e:
print(f"下载失败(可能是首次运行,仓库为空): {e}")
def upload_data():
"""定时上传数据到 Dataset"""
while True:
time.sleep(300) # 每 5 分钟备份一次
if DATASET_ID:
try:
api.upload_folder(
folder_path=LOCAL_DIR,
repo_id=DATASET_ID,
repo_type="dataset"
)
print("备份成功至 Dataset")
except Exception as e:
print(f"备份失败: {e}")
if __name__ == "__main__":
# 1. 启动前先下载
download_data()
# 2. 启动定时备份线程
backup_thread = threading.Thread(target=upload_data, daemon=True)
backup_thread.start()
# 3. 运行原有的 网页UI (注意:此处要和你的 Dockerfile 启动命令一致)
# 使用 subprocess 运行,防止主进程阻塞
subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])