bufe commited on
Commit
0481745
·
verified ·
1 Parent(s): 6a91c19

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +26 -28
sync_data.sh CHANGED
@@ -10,6 +10,23 @@ fi
10
  # 激活虚拟环境
11
  . /opt/venv/bin/activate
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # 上传备份
14
  upload_backup() {
15
  file_path="$1"
@@ -17,8 +34,6 @@ upload_backup() {
17
 
18
  python3 -c "
19
  from huggingface_hub import HfApi
20
- import sys
21
- import os
22
  api = HfApi(token='$HF_TOKEN')
23
  try:
24
  api.upload_file(
@@ -31,40 +46,32 @@ try:
31
  except Exception as e:
32
  print(f'Error uploading file: {str(e)}')
33
  "
 
 
34
  }
35
 
36
  # 下载最新备份
37
  download_latest_backup() {
38
  python3 -c "
39
  from huggingface_hub import HfApi
40
- import sys
41
- import os
42
- import tarfile
43
- import tempfile
44
  api = HfApi(token='$HF_TOKEN')
45
  try:
46
  files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
47
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
48
-
49
  if not backup_files:
50
  print('No backup files found')
51
  sys.exit()
52
-
53
  latest_backup = sorted(backup_files)[-1]
54
-
55
  with tempfile.TemporaryDirectory() as temp_dir:
56
  filepath = api.hf_hub_download(
57
- repo_id='$DATASET_ID',
58
- filename=latest_backup,
59
- repo_type='dataset',
60
- local_dir=temp_dir
61
  )
62
-
63
- if filepath and os.path.exists(filepath):
64
  with tarfile.open(filepath, 'r:gz') as tar:
65
  tar.extractall('/app/electerm-web/data')
66
  print(f'Successfully restored backup from {latest_backup}')
67
-
68
  except Exception as e:
69
  print(f'Error downloading backup: {str(e)}')
70
  "
@@ -74,29 +81,20 @@ except Exception as e:
74
  echo "Downloading latest backup from HuggingFace..."
75
  download_latest_backup
76
 
77
- # 同步函数
78
  sync_data() {
79
  while true; do
80
  echo "Starting sync process at $(date)"
81
-
82
  if [ -d /app/electerm-web/data ]; then
83
- timestamp=$(date +%Y%m%d_%H%M%S)
84
- backup_file="electerm_backup_${timestamp}.tar.gz"
85
-
86
- # 压缩数据目录
87
  tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
88
-
89
  echo "Uploading backup to HuggingFace..."
90
  upload_backup "/tmp/${backup_file}" "${backup_file}"
91
-
92
  rm -f "/tmp/${backup_file}"
93
  else
94
  echo "Data directory does not exist yet, waiting for next sync..."
95
  fi
96
-
97
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
98
- echo "Next sync in ${SYNC_INTERVAL} seconds..."
99
- sleep $SYNC_INTERVAL
100
  done
101
  }
102
 
 
10
  # 激活虚拟环境
11
  . /opt/venv/bin/activate
12
 
13
+ # 备份管理函数
14
+ manage_backups() {
15
+ python3 -c "
16
+ from huggingface_hub import HfApi
17
+ api = HfApi(token='$HF_TOKEN')
18
+ try:
19
+ files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
20
+ backup_files = sorted([f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')])
21
+ backup_count = int('$DATASET_N') if '$DATASET_N'.isdigit() else 5
22
+ for file in backup_files[:-backup_count]:
23
+ api.delete_file(path_in_repo=file, repo_id='$DATASET_ID', repo_type='dataset')
24
+ print(f'Deleted old backup: {file}')
25
+ except Exception as e:
26
+ print(f'Error managing backups: {str(e)}')
27
+ "
28
+ }
29
+
30
  # 上传备份
31
  upload_backup() {
32
  file_path="$1"
 
34
 
35
  python3 -c "
36
  from huggingface_hub import HfApi
 
 
37
  api = HfApi(token='$HF_TOKEN')
38
  try:
39
  api.upload_file(
 
46
  except Exception as e:
47
  print(f'Error uploading file: {str(e)}')
48
  "
49
+
50
+ manage_backups
51
  }
52
 
53
  # 下载最新备份
54
  download_latest_backup() {
55
  python3 -c "
56
  from huggingface_hub import HfApi
57
+ import tarfile, tempfile, os, sys
 
 
 
58
  api = HfApi(token='$HF_TOKEN')
59
  try:
60
  files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
61
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
 
62
  if not backup_files:
63
  print('No backup files found')
64
  sys.exit()
 
65
  latest_backup = sorted(backup_files)[-1]
 
66
  with tempfile.TemporaryDirectory() as temp_dir:
67
  filepath = api.hf_hub_download(
68
+ repo_id='$DATASET_ID', filename=latest_backup,
69
+ repo_type='dataset', local_dir=temp_dir
 
 
70
  )
71
+ if os.path.exists(filepath):
 
72
  with tarfile.open(filepath, 'r:gz') as tar:
73
  tar.extractall('/app/electerm-web/data')
74
  print(f'Successfully restored backup from {latest_backup}')
 
75
  except Exception as e:
76
  print(f'Error downloading backup: {str(e)}')
77
  "
 
81
  echo "Downloading latest backup from HuggingFace..."
82
  download_latest_backup
83
 
84
+ # 数据同步函数
85
  sync_data() {
86
  while true; do
87
  echo "Starting sync process at $(date)"
 
88
  if [ -d /app/electerm-web/data ]; then
89
+ backup_file="electerm_backup_$(date +%Y%m%d_%H%M%S).tar.gz"
 
 
 
90
  tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
 
91
  echo "Uploading backup to HuggingFace..."
92
  upload_backup "/tmp/${backup_file}" "${backup_file}"
 
93
  rm -f "/tmp/${backup_file}"
94
  else
95
  echo "Data directory does not exist yet, waiting for next sync..."
96
  fi
97
+ sleep ${SYNC_INTERVAL:-7200}
 
 
 
98
  done
99
  }
100