File size: 6,768 Bytes
cbe8748
 
 
 
 
 
 
 
 
 
 
 
690077c
cbe8748
 
 
 
 
 
 
b3aa93e
cbe8748
b3aa93e
cbe8748
 
 
 
 
 
 
 
 
 
b3aa93e
cbe8748
 
 
 
 
 
 
 
 
b3aa93e
cbe8748
 
 
b3aa93e
cbe8748
 
 
b3aa93e
cbe8748
b3aa93e
cbe8748
 
 
 
 
 
 
 
 
 
1d6559c
 
b3aa93e
 
1d6559c
 
cbe8748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3aa93e
 
cbe8748
b3aa93e
 
 
cbe8748
 
 
 
b3aa93e
 
 
 
 
cbe8748
 
 
 
 
b3aa93e
cbe8748
 
b3aa93e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbe8748
b3aa93e
690077c
cbe8748
b3aa93e
cbe8748
 
b3aa93e
cbe8748
 
 
 
 
 
 
64110eb
 
 
 
 
 
 
 
 
 
690077c
64110eb
 
690077c
64110eb
 
690077c
64110eb
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/bin/sh

# 检查环境变量
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
    echo "未检测到 HF_TOKEN 或 DATASET_ID,备份功能不可用"
    exit 1
fi

# 激活虚拟环境
. ${APP_HOME}/venv/bin/activate

# 生成同步脚本
cat > /tmp/hf_sync.py << 'EOL'
# HuggingFace 同步脚本
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile

def manage_backups(api, repo_id, prefix, max_files=50):
    files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
    backup_files = [f for f in files if f.startswith(prefix) and f.endswith('.tar.gz')]
    backup_files.sort()
    if len(backup_files) >= max_files:
        files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
        for file_to_delete in files_to_delete:
            try:
                api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type="dataset")
                print(f'已删除旧备份: {file_to_delete}')
            except Exception as e:
                print(f'删除 {file_to_delete} 时出错: {str(e)}')

def upload_backup(file_path, file_name, token, repo_id, prefix):
    api = HfApi(token=token)
    try:
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=file_name,
            repo_id=repo_id,
            repo_type="dataset"
        )
        print(f"成功上传 {file_name}")
        manage_backups(api, repo_id, prefix)
    except Exception as e:
        print(f"上传文件出错: {str(e)}")

def download_latest_backup(token, repo_id, prefix, extract_path):
    try:
        api = HfApi(token=token)
        files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
        backup_files = [f for f in files if f.startswith(prefix) and f.endswith('.tar.gz')]
        if not backup_files:
            print(f"未找到任何备份文件(前缀: {prefix})")
            return
        latest_backup = sorted(backup_files)[-1]
        with tempfile.TemporaryDirectory() as temp_dir:
            filepath = api.hf_hub_download(
                repo_id=repo_id,
                filename=latest_backup,
                repo_type="dataset",
                local_dir=temp_dir
            )
            if filepath and os.path.exists(filepath):
                try:
                    with tarfile.open(filepath, 'r:gz') as tar:
                        tar.extractall(extract_path)
                    print(f"已成功恢复备份: {latest_backup} 到 {extract_path}")
                except Exception as e:
                    print(f"解压备份时出错: {str(e)}")
    except Exception as e:
        print(f"下载备份出错: {str(e)}")

def super_squash_history(token, repo_id):
    try:
        api = HfApi(token=token)
        api.super_squash_history(repo_id=repo_id, repo_type="dataset")
        print("历史合并完成。")
    except Exception as e:
        print(f"合并历史出错: {str(e)}")

if __name__ == "__main__":
    action = sys.argv[1]
    token = sys.argv[2]
    repo_id = sys.argv[3]
    if action == "upload":
        file_path = sys.argv[4]
        file_name = sys.argv[5]
        prefix = sys.argv[6]
        upload_backup(file_path, file_name, token, repo_id, prefix)
    elif action == "download":
        prefix = sys.argv[4]
        extract_path = sys.argv[5]
        download_latest_backup(token, repo_id, prefix, extract_path)
    elif action == "super_squash":
        super_squash_history(token, repo_id)
EOL

# 首次启动时分别还原 config 和 tools
echo "正在从 HuggingFace 下载 config 备份..."
python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "config_backup_" "/mcp-proxy-server/config"
echo "正在从 HuggingFace 下载 tools 备份..."
python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "tools_backup_" "/tools"

# 同步函数
sync_data() {
    while true; do
        echo "同步进程启动于 $(date)"

        STORAGE_PATH1="/mcp-proxy-server/config"
        STORAGE_PATH2="/tools"
        timestamp=$(date +%Y%m%d_%H%M%S)
        backup_file1="config_backup_${timestamp}.tar.gz"
        backup_file2="tools_backup_${timestamp}.tar.gz"

        if [ -d "${STORAGE_PATH1}" ]; then
            tar -czf "/tmp/${backup_file1}" -C "/mcp-proxy-server" "config"
            echo "正在上传 config 备份到 HuggingFace..."
            python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file1}" "${backup_file1}" "config_backup_"
            rm -f "/tmp/${backup_file1}"
        fi

        if [ -d "${STORAGE_PATH2}" ]; then
            tar -czf "/tmp/${backup_file2}" -C "/" "tools"
            echo "正在上传 tools 备份到 HuggingFace..."
            python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file2}" "${backup_file2}" "tools_backup_"
            rm -f "/tmp/${backup_file2}"
        fi

        # 合并历史提交
        SQUASH_FLAG_FILE="/tmp/last_squash_time"
        NOW=$(date +%s)
        SEVEN_DAYS=$((7*24*60*60))
        if [ ! -f "$SQUASH_FLAG_FILE" ]; then
            echo $NOW > "$SQUASH_FLAG_FILE"
            echo "首次合并历史提交..."
            python /tmp/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
        else
            LAST=$(cat "$SQUASH_FLAG_FILE")
            DIFF=$((NOW - LAST))
            if [ $DIFF -ge $SEVEN_DAYS ]; then
                echo $NOW > "$SQUASH_FLAG_FILE"
                echo "距离上次合并已超过7天,正在合并历史提交..."
                python /tmp/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
            else
                echo "距离上次合并未满7天,本次跳过合并历史提交。"
            fi
        fi

        SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
        echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
        sleep $SYNC_INTERVAL
    done
}

# 启动同步进程
sync_data &

echo "同步进程已在后台启动 (PID: $!)"
echo "现在启动 MCP Proxy 主程序..."

# 启动原始的 MCP Proxy 程序
echo "准备启动 MCP Proxy,端口: ${PORT:-7860},主机: ${HOST:-0.0.0.0}"

if [ -f build/sse.js ]; then
    echo "启动: node build/sse.js"
    exec node build/sse.js
elif [ -f /app/build/sse.js ]; then
    echo "启动: node /app/build/sse.js"
    exec node /app/build/sse.js
elif [ -f /mcp-proxy-server/build/sse.js ]; then
    echo "启动: node /mcp-proxy-server/build/sse.js"  
    exec node /mcp-proxy-server/build/sse.js
else
    echo "错误: 找不到 build/sse.js 文件"
    echo "当前目录: $(pwd)"
    echo "查找可能的 Node.js 文件:"
    find / -name "sse.js" -type f 2>/dev/null | head -5
    
    # 如果找不到,保持容器运行以便调试
    echo "保持容器运行以便调试..."
    tail -f /dev/null
fi