chat-share / start.sh
ggcghh's picture
Update start.sh
8b1710d verified
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
# 自动 export 后面所有赋值
set -a
log() {
echo "[$(date +'%F %T')] $*"
}
# 1. init_backup
init_backup(){
if [[ -n "${DATASET_ID:-}" ]]; then
log "📁 使用外部定义的 DATASET_ID=$DATASET_ID"
return 0
fi
if [[ -z "${HF_TOKEN:-}" ]]; then
log "⚠️ HF_TOKEN 未设置,跳过备份"
return 1
fi
USER_ID=$(python3 - <<'PY'
import os,sys
from huggingface_hub import HfApi
try:
name = HfApi(token=os.getenv("HF_TOKEN")).whoami().get("name","")
print(name) if name else sys.exit(1)
except:
sys.exit(1)
PY
)
if [[ -z "$USER_ID" ]]; then
log "⚠️ 获取 USER_ID 失败,跳过备份"
return 1
fi
DATASET_ID="${USER_ID}/data"
# ← 这里修正了变量名
log "✅ 设置默认 DATASET_ID=$DATASET_ID"
return 0
}
# 2. prep_repo
prep_repo(){
python3 <<'PY'
import os
from huggingface_hub import HfApi
api = HfApi(token=os.getenv("HF_TOKEN"))
repo = os.environ["DATASET_ID"]
author = repo.split("/")[0]
if not any(d.id == repo for d in api.list_datasets(author=author)):
api.create_repo(repo_id=repo, repo_type="dataset", private=True)
branch = "Chat-Share"
refs = api.list_repo_refs(repo_id=repo, repo_type="dataset").branches
if branch not in [b.name for b in refs]:
api.create_branch(repo_id=repo, repo_type="dataset", branch=branch)
PY
log "✅ 数据集 & 分支就绪"
}
# 3. restore_latest
restore_latest(){
python3 <<'PY'
import os,sys,tarfile,tempfile
from huggingface_hub import HfApi
api = HfApi(token=os.getenv("HF_TOKEN"))
repo, branch = os.getenv("DATASET_ID"), "Chat-Share"
files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch)
backs = sorted(f for f in files if f.endswith(".tar.gz"))
if not backs: sys.exit(0)
td = tempfile.mkdtemp()
path = api.hf_hub_download(repo_id=repo, repo_type="dataset",
revision=branch, filename=backs[-1], local_dir=td)
with tarfile.open(path) as t:
t.extractall(os.getenv("BACKUP_DIR"))
PY
log "✅ 恢复最新备份(如果有)"
}
# 4. do_backup
do_backup(){
ts=$(date +%Y%m%d_%H%M%S)
fname="Chat-Share_${ts}.tar.gz"
tmp=$(mktemp -d)
tar -czf "$tmp/$fname" -C "$BACKUP_DIR" .
python3 <<PY
import os
from huggingface_hub import HfApi
api = HfApi(token=os.getenv("HF_TOKEN"))
repo, branch = os.getenv("DATASET_ID"), "Chat-Share"
api.upload_file(path_or_fileobj="$tmp/$fname",
path_in_repo="$fname",
repo_id=repo, repo_type="dataset",
revision=branch)
keep = int(os.getenv("DATASET_NUM", "10"))
files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch)
backs = sorted(f for f in files if f.endswith(".tar.gz"))
for old in backs[:-keep]:
api.delete_file(path_in_repo=old,
repo_id=repo, repo_type="dataset", revision=branch)
api.super_squash_history(repo_id=repo, repo_type="dataset", branch=branch)
PY
rm -rf "$tmp"
log "✅ 上传备份并清理临时文件"
}
# 5. sync_loop
sync_loop(){
while true; do
do_backup
log "⏳ 下次同步在 ${SYNC_INTERVAL}s 后"
sleep "${SYNC_INTERVAL}"
done
}
# 主流程
main(){
BACKUP_DIR="${BACKUP_DIR:-$HOME/app/data}"
DATASET_NUM="${DATASET_NUM:-10}"
SYNC_INTERVAL="${SYNC_INTERVAL:-36000}"
if init_backup; then
log "🚀 启动备份/同步流程,使用数据集:$DATASET_ID"
prep_repo
restore_latest
sync_loop & # 后台
else
log "🚀 直接启动主应用,无备份/同步"
fi
exec python app.py
}
main