mao123qw commited on
Commit
5fa24bd
·
verified ·
1 Parent(s): 3563c5d

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +18 -14
sync_data.sh CHANGED
@@ -6,8 +6,11 @@ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
6
  exit 1
7
  fi
8
 
 
 
 
9
  # 上传备份
10
- cat > /home/app/hf_sync.py << 'EOL'
11
  from huggingface_hub import HfApi
12
  import sys
13
  import os
@@ -44,7 +47,7 @@ def upload_backup(file_path, file_name, token, repo_id):
44
  print(f"Error uploading file: {str(e)}")
45
 
46
  # 下载最新备份
47
- def download_latest_backup(token, repo_id):
48
  try:
49
  api = HfApi(token=token)
50
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
@@ -66,7 +69,7 @@ def download_latest_backup(token, repo_id):
66
 
67
  if filepath and os.path.exists(filepath):
68
  with tarfile.open(filepath, 'r:gz') as tar:
69
- tar.extractall('/home/app/storage/')
70
  print(f"Successfully restored backup from {latest_backup}")
71
 
72
  except Exception as e:
@@ -82,27 +85,28 @@ if __name__ == "__main__":
82
  file_name = sys.argv[5]
83
  upload_backup(file_path, file_name, token, repo_id)
84
  elif action == "download":
85
- download_latest_backup(token, repo_id)
 
86
  EOL
87
 
88
- # 首次启动时从HuggingFace下载最新备份
89
  echo "Downloading latest backup from HuggingFace..."
90
- python3 hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}"
91
 
92
  # 同步函数
93
  sync_data() {
94
  while true; do
95
  echo "Starting sync process at $(date)"
96
 
97
- # 确保数据目录存在
98
- if [ -d "/home/app/storage" ]; then
 
99
  # 创建备份
100
- cd /home/app/
101
  timestamp=$(date +%Y%m%d_%H%M%S)
102
  backup_file="backup_${timestamp}.tar.gz"
103
 
104
- # 压缩数据目录
105
- tar -czf "/tmp/${backup_file}" storage/
106
 
107
  # 上传到HuggingFace
108
  echo "Uploading backup to HuggingFace..."
@@ -111,7 +115,7 @@ sync_data() {
111
  # 清理临时文件
112
  rm -f "/tmp/${backup_file}"
113
  else
114
- echo "Data directory does not exist yet, waiting for next sync..."
115
  fi
116
 
117
  # 同步间隔
@@ -124,5 +128,5 @@ sync_data() {
124
  # 启动同步进程
125
  sync_data &
126
 
127
- # 启动主应用
128
- exec bash install_reader.sh
 
6
  exit 1
7
  fi
8
 
9
+ # 激活虚拟环境
10
+ . /home/app/venv/bin/activate
11
+
12
  # 上传备份
13
+ cat > hf_sync.py << 'EOL'
14
  from huggingface_hub import HfApi
15
  import sys
16
  import os
 
47
  print(f"Error uploading file: {str(e)}")
48
 
49
  # 下载最新备份
50
+ def download_latest_backup(token, repo_id, extract_path):
51
  try:
52
  api = HfApi(token=token)
53
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
 
69
 
70
  if filepath and os.path.exists(filepath):
71
  with tarfile.open(filepath, 'r:gz') as tar:
72
+ tar.extractall(extract_path) # 解压到指定路径
73
  print(f"Successfully restored backup from {latest_backup}")
74
 
75
  except Exception as e:
 
85
  file_name = sys.argv[5]
86
  upload_backup(file_path, file_name, token, repo_id)
87
  elif action == "download":
88
+ extract_path = sys.argv[4] if len(sys.argv) > 4 else '.' # 默认为当前目录
89
+ download_latest_backup(token, repo_id, extract_path)
90
  EOL
91
 
92
+ # 首次启动时从HuggingFace下载最新备份(解压到应用目录)
93
  echo "Downloading latest backup from HuggingFace..."
94
+ python hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "./"
95
 
96
  # 同步函数
97
  sync_data() {
98
  while true; do
99
  echo "Starting sync process at $(date)"
100
 
101
+ # 确保数据目录存在(选择你的实际路径)
102
+ STORAGE_PATH="./storage" # 或改为"./storage"
103
+ if [ -d "${STORAGE_PATH}" ]; then
104
  # 创建备份
 
105
  timestamp=$(date +%Y%m%d_%H%M%S)
106
  backup_file="backup_${timestamp}.tar.gz"
107
 
108
+ # 压缩目录(使用-C避免包含父路径)
109
+ tar -czf "/tmp/${backup_file}" -C "$(dirname "${STORAGE_PATH}")" "$(basename "${STORAGE_PATH}")"
110
 
111
  # 上传到HuggingFace
112
  echo "Uploading backup to HuggingFace..."
 
115
  # 清理临时文件
116
  rm -f "/tmp/${backup_file}"
117
  else
118
+ echo "Storage directory ${STORAGE_PATH} does not exist, waiting..."
119
  fi
120
 
121
  # 同步间隔
 
128
  # 启动同步进程
129
  sync_data &
130
 
131
+ # 启动主应用(根据实际路径调整)
132
+ exec node /app/server.js # 或改为你的启动命令