flzta commited on
Commit
c2e15e4
·
verified ·
1 Parent(s): a338374

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +73 -69
sync_data.sh CHANGED
@@ -1,33 +1,38 @@
1
- #!/bin/sh
2
 
3
- # 设置默认的同步间隔为 600 (10 分钟),可以作为环境变量传入
4
- SYNC_INTERVAL=${SYNC_INTERVAL:-600}
5
-
6
- # 设置备份保留数量,默认为 5,可以作为环境变量传入
7
- BACKUP_RETENTION=${BACKUP_RETENTION:-5}
8
-
9
- # 检查环境变量
10
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
11
- echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
12
- exec ./cloudreve
13
  exit 0
14
  fi
15
 
16
- # 激活虚拟环境
17
- . /opt/venv/bin/activate
 
 
18
 
19
- # 上传备份
 
 
 
 
 
20
  upload_backup() {
21
- file_path="$1"
22
- file_name="$2"
 
 
23
 
24
  python3 -c "
25
  from huggingface_hub import HfApi
26
  import sys
27
  import os
28
- def manage_backups(api, repo_id, max_files=50):
 
 
29
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
30
- backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz')]
31
  backup_files.sort()
32
 
33
  if len(backup_files) >= max_files:
@@ -38,108 +43,107 @@ def manage_backups(api, repo_id, max_files=50):
38
  print(f'Deleted old backup: {file_to_delete}')
39
  except Exception as e:
40
  print(f'Error deleting {file_to_delete}: {str(e)}')
41
- api = HfApi(token='$HF_TOKEN')
 
42
  try:
43
  api.upload_file(
44
  path_or_fileobj='$file_path',
45
  path_in_repo='$file_name',
46
- repo_id='$DATASET_ID',
47
  repo_type='dataset'
48
  )
49
  print(f'Successfully uploaded $file_name')
50
 
51
- manage_backups(api, '$DATASET_ID')
52
  except Exception as e:
53
  print(f'Error uploading file: {str(e)}')
54
  "
55
  }
56
 
57
- # 下载最新备份
58
  download_latest_backup() {
 
 
 
59
  python3 -c "
60
  from huggingface_hub import HfApi
61
  import sys
62
  import os
63
  import tarfile
64
  import tempfile
65
- import shutil # 确保这一行存在
66
- api = HfApi(token='$HF_TOKEN')
 
67
  try:
68
- files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
69
- backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz')]
70
 
71
  if not backup_files:
72
- print(' HuggingFace Dataset 上没有找到备份文件')
73
- sys.exit(0)
74
 
75
  latest_backup = sorted(backup_files)[-1]
76
 
77
  with tempfile.TemporaryDirectory() as temp_dir:
78
  filepath = api.hf_hub_download(
79
- repo_id='$DATASET_ID',
80
  filename=latest_backup,
81
  repo_type='dataset',
82
  local_dir=temp_dir
83
  )
84
 
85
  if filepath and os.path.exists(filepath):
86
- print(f'找到最新的备份:{latest_backup},尝试恢复...')
87
- # 删除现有的数据目录
88
- if os.path.isdir('/opt/cloudreve/data'):
89
- print('删除现有的数据目录:/opt/cloudreve/data')
90
- shutil.rmtree('/opt/cloudreve/data')
91
- # 创建数据目录(如果被删除)
92
- os.makedirs('/opt/cloudreve/data', exist_ok=True)
93
-
 
 
94
  with tarfile.open(filepath, 'r:gz') as tar:
95
- tar.extractall('/opt/cloudreve/data')
96
- print(f'成功从 {latest_backup} 恢复备份')
97
-
98
  except Exception as e:
99
- print(' HuggingFace Dataset 下载备份时出错:{}'.format(e))
100
  "
101
  }
102
 
103
- # 首次启动时下载最新备份和后续每次启动时都尝试恢复
104
- echo "尝试从 HuggingFace Dataset 下载并恢复最新备份..."
105
  download_latest_backup
106
 
107
- # 同步函数
108
  sync_data() {
109
  while true; do
110
- echo "开始同步进程于 $(date)"
111
 
112
- if [ -d /opt/cloudreve/data ]; then
113
  timestamp=$(date +%Y%m%d_%H%M%S)
114
- backup_file="cloudreve_backup_${timestamp}.tar.gz"
115
- backup_file_path="/tmp/${backup_file}"
116
-
117
- echo "创建备份归档:${backup_file_path}"
118
- echo "在备份之前列出 /opt/cloudreve/data 的内容:"
119
- ls -al /opt/cloudreve/data
120
-
121
- # 尝试直接打包 /opt/cloudreve/data 目录,并显示详细信息
122
- tar -czvf "${backup_file_path}" /opt/cloudreve/data
123
-
124
- if [ -f "${backup_file_path}" ]; then
125
- echo "正在上传备份到 HuggingFace Dataset..."
126
- upload_backup "${backup_file_path}" "${backup_file}"
127
- rm -f "${backup_file_path}"
128
- else:
129
- echo "创建备份归档失败。"
130
- fi
131
- else:
132
- echo "数据目录尚不存在,等待下一次同步..."
133
  fi
134
 
135
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
136
- echo "下一次同步将在 ${SYNC_INTERVAL} 秒后进行..."
137
- sleep "$SYNC_INTERVAL"
138
  done
139
  }
140
 
141
- # 后台启动同步进程
142
  sync_data &
143
 
144
  # 启动 Cloudreve
145
- exec ./cloudreve
 
 
1
+ #!/bin/bash
2
 
3
+ # 检查 Hugging Face Token 和 Dataset ID 环境变量
 
 
 
 
 
 
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting Cloudreve without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
10
+ # 定义 Cloudreve 数据目录和配置文件路径
11
+ DATA_DIR="/opt/cloudreve/data"
12
+ CONFIG_FILE="/opt/cloudreve/config.ini"
13
+ BACKUP_PREFIX="cloudreve_backup"
14
 
15
+ # 激活 Python 虚拟环境
16
+ if [ -f "/opt/venv/bin/activate" ]; then
17
+ source /opt/venv/bin/activate
18
+ fi
19
+
20
+ # Python 函数:上传备份
21
  upload_backup() {
22
+ local file_path="$1"
23
+ local file_name="$2"
24
+ local token="$HF_TOKEN"
25
+ local repo_id="$DATASET_ID"
26
 
27
  python3 -c "
28
  from huggingface_hub import HfApi
29
  import sys
30
  import os
31
+ import glob
32
+
33
+ def manage_backups(api, repo_id, max_files=5):
34
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
35
+ backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
36
  backup_files.sort()
37
 
38
  if len(backup_files) >= max_files:
 
43
  print(f'Deleted old backup: {file_to_delete}')
44
  except Exception as e:
45
  print(f'Error deleting {file_to_delete}: {str(e)}')
46
+
47
+ api = HfApi(token='$token')
48
  try:
49
  api.upload_file(
50
  path_or_fileobj='$file_path',
51
  path_in_repo='$file_name',
52
+ repo_id='$repo_id',
53
  repo_type='dataset'
54
  )
55
  print(f'Successfully uploaded $file_name')
56
 
57
+ manage_backups(api, '$repo_id')
58
  except Exception as e:
59
  print(f'Error uploading file: {str(e)}')
60
  "
61
  }
62
 
63
+ # Python 函数:下载最新备份
64
  download_latest_backup() {
65
+ local token="$HF_TOKEN"
66
+ local repo_id="$DATASET_ID"
67
+
68
  python3 -c "
69
  from huggingface_hub import HfApi
70
  import sys
71
  import os
72
  import tarfile
73
  import tempfile
74
+ import glob
75
+
76
+ api = HfApi(token='$token')
77
  try:
78
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
79
+ backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
80
 
81
  if not backup_files:
82
+ print('No backup files found.')
83
+ sys.exit()
84
 
85
  latest_backup = sorted(backup_files)[-1]
86
 
87
  with tempfile.TemporaryDirectory() as temp_dir:
88
  filepath = api.hf_hub_download(
89
+ repo_id='$repo_id',
90
  filename=latest_backup,
91
  repo_type='dataset',
92
  local_dir=temp_dir
93
  )
94
 
95
  if filepath and os.path.exists(filepath):
96
+ # 删除现有的数据目录和配置文件
97
+ if [ -d \"$DATA_DIR\" ]; then
98
+ echo \"Deleting existing data directory: $DATA_DIR\"
99
+ rm -rf \"$DATA_DIR\"
100
+ fi
101
+ if [ -f \"$CONFIG_FILE\" ]; then
102
+ echo \"Deleting existing config file: $CONFIG_FILE\"
103
+ rm -rf \"$CONFIG_FILE\"
104
+ fi
105
+ mkdir -p \"$DATA_DIR\"
106
  with tarfile.open(filepath, 'r:gz') as tar:
107
+ tar.extractall(\"/opt/cloudreve\") # 将备份恢复到 /opt/cloudreve 目录
108
+ echo f'Successfully restored backup from {latest_backup}'
 
109
  except Exception as e:
110
+ print(f'Error downloading backup: {str(e)}')
111
  "
112
  }
113
 
114
+ # 首次启动时下载最新备份
115
+ echo "Downloading latest backup from HuggingFace..."
116
  download_latest_backup
117
 
118
+ # 后台启动同步进程
119
  sync_data() {
120
  while true; do
121
+ echo "Starting sync process at $(date)"
122
 
123
+ if [ -d "$DATA_DIR" ]; then
124
  timestamp=$(date +%Y%m%d_%H%M%S)
125
+ backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
126
+ backup_path="/tmp/${backup_file}"
127
+
128
+ echo "Compressing data directory and config file..."
129
+ tar -czf "$backup_path" -C /opt/cloudreve cloudreve config.ini
130
+
131
+ echo "Uploading backup to HuggingFace..."
132
+ upload_backup "$backup_path" "${backup_file}"
133
+
134
+ rm -f "$backup_path"
135
+ else
136
+ echo "Data directory does not exist yet, waiting for next sync..."
 
 
 
 
 
 
 
137
  fi
138
 
139
+ SYNC_INTERVAL=${SYNC_INTERVAL:-3600} # 默认同步间隔为 1 小时
140
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
141
+ sleep $SYNC_INTERVAL
142
  done
143
  }
144
 
 
145
  sync_data &
146
 
147
  # 启动 Cloudreve
148
+ echo "Starting Cloudreve..."
149
+ exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini