HuggingFace0920 commited on
Commit
b3aa93e
·
verified ·
1 Parent(s): 5e4fdf4

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +53 -62
sync_data.sh CHANGED
@@ -18,10 +18,9 @@ import os
18
  import tarfile
19
  import tempfile
20
 
21
- # 管理备份文件数量,超出最大数量则自动删除最旧的备份
22
- def manage_backups(api, repo_id, max_files=50):
23
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
24
- backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
25
  backup_files.sort()
26
  if len(backup_files) >= max_files:
27
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
@@ -32,8 +31,7 @@ def manage_backups(api, repo_id, max_files=50):
32
  except Exception as e:
33
  print(f'删除 {file_to_delete} 时出错: {str(e)}')
34
 
35
- # 上传备份文件到 HuggingFace
36
- def upload_backup(file_path, file_name, token, repo_id):
37
  api = HfApi(token=token)
38
  try:
39
  api.upload_file(
@@ -43,19 +41,17 @@ def upload_backup(file_path, file_name, token, repo_id):
43
  repo_type="dataset"
44
  )
45
  print(f"成功上传 {file_name}")
46
- manage_backups(api, repo_id)
47
  except Exception as e:
48
  print(f"上传文件出错: {str(e)}")
49
 
50
-
51
- # 下载最新备份
52
- def download_latest_backup(token, repo_id, extract_path):
53
  try:
54
  api = HfApi(token=token)
55
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
56
- backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
57
  if not backup_files:
58
- print("未找到任何备份文件")
59
  return
60
  latest_backup = sorted(backup_files)[-1]
61
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -68,17 +64,13 @@ def download_latest_backup(token, repo_id, extract_path):
68
  if filepath and os.path.exists(filepath):
69
  try:
70
  with tarfile.open(filepath, 'r:gz') as tar:
71
- tar.extractall("/mcp-proxy-server") # 或其它有权限的目录
72
- tar.extractall("/tools") # 如有需要
73
- print(f"已成功恢复备份: {latest_backup}")
74
- except PermissionError as e:
75
- print(f"解压备份时权限不足: {str(e)},请确保容器有写入根目录的权限。")
76
  except Exception as e:
77
  print(f"解压备份时出错: {str(e)}")
78
  except Exception as e:
79
  print(f"下载备份出错: {str(e)}")
80
 
81
- # 合并历史提交
82
  def super_squash_history(token, repo_id):
83
  try:
84
  api = HfApi(token=token)
@@ -87,7 +79,6 @@ def super_squash_history(token, repo_id):
87
  except Exception as e:
88
  print(f"合并历史出错: {str(e)}")
89
 
90
- # 主函数
91
  if __name__ == "__main__":
92
  action = sys.argv[1]
93
  token = sys.argv[2]
@@ -95,67 +86,67 @@ if __name__ == "__main__":
95
  if action == "upload":
96
  file_path = sys.argv[4]
97
  file_name = sys.argv[5]
98
- upload_backup(file_path, file_name, token, repo_id)
 
99
  elif action == "download":
100
- extract_path = sys.argv[4] if len(sys.argv) > 4 else '.'
101
- download_latest_backup(token, repo_id, extract_path)
 
102
  elif action == "super_squash":
103
  super_squash_history(token, repo_id)
104
  EOL
105
 
106
- # 首次启动时从 HuggingFace 下载最新备份(解压到应用目录)
107
- echo "正在从 HuggingFace 下载最新备份..."
108
- python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "/"
 
 
109
 
110
  # 同步函数
111
  sync_data() {
112
  while true; do
113
  echo "同步进程启动于 $(date)"
114
-
115
- # 确保数据目录存在(请根据实际路径修改)
116
  STORAGE_PATH1="/mcp-proxy-server/config"
117
  STORAGE_PATH2="/tools"
118
- if [ -d "${STORAGE_PATH1}" ] || [ -d "${STORAGE_PATH2}" ]; then
119
- # 创建备份
120
- timestamp=$(date +%Y%m%d_%H%M%S)
121
- backup_file="backup_${timestamp}.tar.gz"
122
-
123
- # 压缩目录(使用-C避免包含父路径),同时包含/mcp-proxy-server/config和/tools
124
- tar -czf "/tmp/${backup_file}" \
125
- -C "/mcp-proxy-server" "config" \
126
- -C "/" "tools"
127
-
128
- # 上传到 HuggingFace
129
- echo "正在上传备份到 HuggingFace..."
130
- python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
131
-
132
- # 合并历史提交
133
- SQUASH_FLAG_FILE="/tmp/last_squash_time"
134
- NOW=$(date +%s)
135
- SEVEN_DAYS=$((7*24*60*60))
136
- if [ ! -f "$SQUASH_FLAG_FILE" ]; then
 
 
 
 
 
 
 
 
 
 
 
137
  echo $NOW > "$SQUASH_FLAG_FILE"
138
- echo "首次合并历史提交..."
139
  python /tmp/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
140
  else
141
- LAST=$(cat "$SQUASH_FLAG_FILE")
142
- DIFF=$((NOW - LAST))
143
- if [ $DIFF -ge $SEVEN_DAYS ]; then
144
- echo $NOW > "$SQUASH_FLAG_FILE"
145
- echo "距离上次合并已超过7天,正在合并历史提交..."
146
- python /tmp/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
147
- else
148
- echo "距离上次合并未满7天,本次跳过合并历史提交。"
149
- fi
150
  fi
151
-
152
- # 清理临时文件
153
- rm -f "/tmp/${backup_file}"
154
- else
155
- echo "存储目录 ${STORAGE_PATH1} 或 ${STORAGE_PATH2} 不存在,等待中..."
156
  fi
157
-
158
- # 同步间隔
159
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
160
  echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
161
  sleep $SYNC_INTERVAL
 
18
  import tarfile
19
  import tempfile
20
 
21
+ def manage_backups(api, repo_id, prefix, max_files=50):
 
22
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
23
+ backup_files = [f for f in files if f.startswith(prefix) and f.endswith('.tar.gz')]
24
  backup_files.sort()
25
  if len(backup_files) >= max_files:
26
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
 
31
  except Exception as e:
32
  print(f'删除 {file_to_delete} 时出错: {str(e)}')
33
 
34
+ def upload_backup(file_path, file_name, token, repo_id, prefix):
 
35
  api = HfApi(token=token)
36
  try:
37
  api.upload_file(
 
41
  repo_type="dataset"
42
  )
43
  print(f"成功上传 {file_name}")
44
+ manage_backups(api, repo_id, prefix)
45
  except Exception as e:
46
  print(f"上传文件出错: {str(e)}")
47
 
48
+ def download_latest_backup(token, repo_id, prefix, extract_path):
 
 
49
  try:
50
  api = HfApi(token=token)
51
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
52
+ backup_files = [f for f in files if f.startswith(prefix) and f.endswith('.tar.gz')]
53
  if not backup_files:
54
+ print(f"未找到任何备份文件(前缀: {prefix})")
55
  return
56
  latest_backup = sorted(backup_files)[-1]
57
  with tempfile.TemporaryDirectory() as temp_dir:
 
64
  if filepath and os.path.exists(filepath):
65
  try:
66
  with tarfile.open(filepath, 'r:gz') as tar:
67
+ tar.extractall(extract_path)
68
+ print(f"已成功恢复备份: {latest_backup} 到 {extract_path}")
 
 
 
69
  except Exception as e:
70
  print(f"解压备份时出错: {str(e)}")
71
  except Exception as e:
72
  print(f"下载备份出错: {str(e)}")
73
 
 
74
  def super_squash_history(token, repo_id):
75
  try:
76
  api = HfApi(token=token)
 
79
  except Exception as e:
80
  print(f"合并历史出错: {str(e)}")
81
 
 
82
  if __name__ == "__main__":
83
  action = sys.argv[1]
84
  token = sys.argv[2]
 
86
  if action == "upload":
87
  file_path = sys.argv[4]
88
  file_name = sys.argv[5]
89
+ prefix = sys.argv[6]
90
+ upload_backup(file_path, file_name, token, repo_id, prefix)
91
  elif action == "download":
92
+ prefix = sys.argv[4]
93
+ extract_path = sys.argv[5]
94
+ download_latest_backup(token, repo_id, prefix, extract_path)
95
  elif action == "super_squash":
96
  super_squash_history(token, repo_id)
97
  EOL
98
 
99
+ # 首次启动时分别还原 config 和 tools
100
+ echo "正在从 HuggingFace 下载 config 备份..."
101
+ python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "config_backup_" "/mcp-proxy-server/config"
102
+ echo "正在从 HuggingFace 下载 tools 备份..."
103
+ python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "tools_backup_" "/tools"
104
 
105
  # 同步函数
106
  sync_data() {
107
  while true; do
108
  echo "同步进程启动于 $(date)"
109
+
 
110
  STORAGE_PATH1="/mcp-proxy-server/config"
111
  STORAGE_PATH2="/tools"
112
+ timestamp=$(date +%Y%m%d_%H%M%S)
113
+ backup_file1="config_backup_${timestamp}.tar.gz"
114
+ backup_file2="tools_backup_${timestamp}.tar.gz"
115
+
116
+ if [ -d "${STORAGE_PATH1}" ]; then
117
+ tar -czf "/tmp/${backup_file1}" -C "/mcp-proxy-server" "config"
118
+ echo "正在上传 config 备份到 HuggingFace..."
119
+ python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file1}" "${backup_file1}" "config_backup_"
120
+ rm -f "/tmp/${backup_file1}"
121
+ fi
122
+
123
+ if [ -d "${STORAGE_PATH2}" ]; then
124
+ tar -czf "/tmp/${backup_file2}" -C "/" "tools"
125
+ echo "正在上传 tools 备份到 HuggingFace..."
126
+ python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file2}" "${backup_file2}" "tools_backup_"
127
+ rm -f "/tmp/${backup_file2}"
128
+ fi
129
+
130
+ # 合并历史提交
131
+ SQUASH_FLAG_FILE="/tmp/last_squash_time"
132
+ NOW=$(date +%s)
133
+ SEVEN_DAYS=$((7*24*60*60))
134
+ if [ ! -f "$SQUASH_FLAG_FILE" ]; then
135
+ echo $NOW > "$SQUASH_FLAG_FILE"
136
+ echo "首次合并历史提交..."
137
+ python /tmp/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
138
+ else
139
+ LAST=$(cat "$SQUASH_FLAG_FILE")
140
+ DIFF=$((NOW - LAST))
141
+ if [ $DIFF -ge $SEVEN_DAYS ]; then
142
  echo $NOW > "$SQUASH_FLAG_FILE"
143
+ echo "距离上次合并已超过7天,正在合并历史提交..."
144
  python /tmp/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
145
  else
146
+ echo "距离上次合并未满7天,本次跳过合并历史提交。"
 
 
 
 
 
 
 
 
147
  fi
 
 
 
 
 
148
  fi
149
+
 
150
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
151
  echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
152
  sleep $SYNC_INTERVAL