File size: 3,411 Bytes
d7ec3af
 
f6927a4
 
 
 
 
d7ec3af
 
f6927a4
4fcdb0e
d7ec3af
 
f6927a4
 
 
 
 
 
 
d7ec3af
f6927a4
 
 
 
 
d7ec3af
f6927a4
 
 
d7ec3af
f6927a4
 
 
 
 
 
 
 
d7ec3af
 
 
 
f6927a4
d7ec3af
f6927a4
4fcdb0e
d7ec3af
 
 
f6927a4
d7ec3af
4fcdb0e
f6927a4
 
d7ec3af
 
f6927a4
 
 
 
 
 
 
d7ec3af
f6927a4
 
 
 
d7ec3af
f6927a4
 
d7ec3af
f6927a4
 
 
 
 
 
 
 
 
d7ec3af
 
 
f6927a4
d7ec3af
f6927a4
d7ec3af
 
 
f6927a4
d7ec3af
 
 
 
4fcdb0e
f6927a4
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/bin/sh

# 日志函数
log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}

# 检查环境变量
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
    log "错误: 未检测到 HF_TOKEN 或 DATASET_ID,备份功能不可用"
    exit 1
fi

# 检查虚拟环境
if [ -f "$HOME/venv/bin/activate" ]; then
    log "激活虚拟环境..."
    . $HOME/venv/bin/activate
else
    log "警告: 未找到虚拟环境,将使用系统Python"
fi

# 设置默认参数
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}  # 默认2小时
MAX_BACKUPS=${MAX_BACKUPS:-50}         # 默认保留50个备份
MAX_BACKUP_SIZE=${MAX_BACKUP_SIZE:-100} # 默认备份大小限制100MB
STORAGE_PATH="$HOME/app/data"

# 生成同步脚本到正确路径
cat > /app/hf_sync.py << 'EOL'
... (此处为之前的hf_sync.py内容,保持不变)
EOL

# 首次启动时从 HuggingFace 下载最新备份
log "正在从 HuggingFace 下载最新备份..."
if python /app/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "$HOME/app"; then
    log "备份恢复成功"
else
    log "备份恢复失败,将继续启动应用"
fi

# 同步函数
sync_data() {
    while true; do
        log "同步进程启动"
        
        # 确保数据目录存在
        if [ -d "${STORAGE_PATH}" ]; then
            # 创建备份
            timestamp=$(date +%Y%m%d_%H%M%S)
            backup_file="backup_${timestamp}.tar.gz"
            temp_backup="/tmp/${backup_file}"
            
            # 压缩目录(使用-C避免包含父路径)
            log "正在创建备份..."
            tar -czf "${temp_backup}" -C "$(dirname "${STORAGE_PATH}")" "$(basename "${STORAGE_PATH}")"
            
            # 上传到 HuggingFace
            log "正在上传备份到 HuggingFace..."
            if python /app/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "${temp_backup}" "${backup_file}" "${MAX_BACKUP_SIZE}"; then
                log "备份上传成功"
            else
                log "备份上传失败"
            fi

            # 合并历史提交
            SQUASH_FLAG_FILE="/tmp/last_squash_time"
            NOW=$(date +%s)
            SEVEN_DAYS=$((7*24*60*60))
            if [ ! -f "$SQUASH_FLAG_FILE" ]; then
                echo $NOW > "$SQUASH_FLAG_FILE"
                log "首次合并历史提交..."
                python /app/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
            else
                LAST=$(cat "$SQUASH_FLAG_FILE")
                DIFF=$((NOW - LAST))
                if [ $DIFF -ge $SEVEN_DAYS ]; then
                    echo $NOW > "$SQUASH_FLAG_FILE"
                    log "距离上次合并已超过7天,正在合并历史提交..."
                    python /app/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
                else
                    log "距离上次合并未满7天,本次跳过合并历史提交。"
                fi
            fi

            # 清理临时文件
            rm -f "${temp_backup}"
        else
            log "存储目录 ${STORAGE_PATH} 不存在,等待中..."
        fi
        
        # 同步间隔
        log "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
        sleep $SYNC_INTERVAL
    done
}

# 启动同步进程
log "启动数据同步后台进程..."
sync_data &

# 记录同步进程ID
echo $! > /tmp/sync_process.pid
log "同步进程已启动,PID: $(cat /tmp/sync_process.pid)"