flzta commited on
Commit
afb63c7
·
verified ·
1 Parent(s): 73ce868

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +105 -110
sync_data.sh CHANGED
@@ -2,34 +2,31 @@
2
 
3
  # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
- echo "Starting Cloudreve without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
 
 
 
10
  # 定义 Cloudreve 主程序目录
11
  CLOUDREVE_DIR="/opt/cloudreve"
12
  BACKUP_PREFIX="cloudreve_backup"
13
 
14
- # 激活 Python 虚拟环境
15
- if [ -f "/opt/venv/bin/activate" ]; then
16
- source /opt/venv/bin/activate
17
- fi
18
-
19
- # Python 函数:上传备份
20
  upload_backup() {
21
- local file_path="$1"
22
- local file_name="$2"
23
- local token="$HF_TOKEN"
24
- local repo_id="$DATASET_ID"
25
 
26
  python3 -c "
27
  from huggingface_hub import HfApi
28
  import sys
29
  import os
30
- import glob
31
 
32
- def manage_backups(api, repo_id, max_files=5):
33
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
34
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
35
  backup_files.sort()
@@ -43,102 +40,100 @@ def manage_backups(api, repo_id, max_files=5):
43
  except Exception as e:
44
  print(f'Error deleting {file_to_delete}: {str(e)}')
45
 
46
- api = HfApi(token='$token')
47
- try:
48
- api.upload_file(
49
- path_or_fileobj='$file_path',
50
- path_in_repo='$file_name',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  repo_id='$repo_id',
52
- repo_type='dataset'
 
 
53
  )
54
- print(f'Successfully uploaded $file_name')
55
-
56
- manage_backups(api, '$repo_id')
57
- except Exception as e:
58
- print(f'Error uploading file: {str(e)}')
59
- "
60
- }
61
-
62
- # Python 函数:下载最新备份
63
- download_latest_backup() {
64
- local token="$HF_TOKEN"
65
- local repo_id="$DATASET_ID"
66
-
67
- python3 -c "
68
- from huggingface_hub import HfApi
69
- import sys
70
- import os
71
- import tarfile
72
- import tempfile
73
- import glob
74
-
75
- api = HfApi(token='$token')
76
- try:
77
- files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
78
- backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
79
-
80
- if not backup_files:
81
- print('No backup files found.')
82
- sys.exit()
83
-
84
- latest_backup = sorted(backup_files)[-1]
85
-
86
- with tempfile.TemporaryDirectory() as temp_dir:
87
- filepath = api.hf_hub_download(
88
- repo_id='$repo_id',
89
- filename=latest_backup,
90
- repo_type='dataset',
91
- local_dir=temp_dir
92
- )
93
-
94
- if filepath and os.path.exists(filepath):
95
- # 删除现有的 Cloudreve 目录和配置文件 (除了 data 目录,如果 data 目录在主目录下)
96
- cd \"$CLOUDREVE_DIR\"
97
- if [ -f cloudreve ] ; then rm -f cloudreve; fi
98
- if [ -f cloudreve.db ] ; then rm -f cloudreve.db; fi
99
- if [ -f config.ini ] ; then rm -f config.ini; fi
100
-
101
- with tarfile.open(filepath, 'r:gz') as tar:
102
- tar.extractall(\"$CLOUDREVE_DIR\")
103
- echo f'Successfully restored backup from {latest_backup}'
104
- except Exception as e:
105
- print(f'Error downloading backup: {str(e)}')
106
- "
107
- }
108
-
109
- # 首次启动时下载最新备份
110
- echo "Downloading latest backup from HuggingFace..."
111
- download_latest_backup
112
-
113
- # 后台启动同步进程
114
- sync_data() {
115
- while true; do
116
- echo "Starting sync process at $(date)"
117
-
118
- if [ -d "$CLOUDREVE_DIR" ]; then
119
- timestamp=$(date +%Y%m%d_%H%M%S)
120
- backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
121
- backup_path="/tmp/${backup_file}"
122
-
123
- echo "Compressing Cloudreve directory (including database and config)..."
124
- tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
125
-
126
- echo "Uploading backup to HuggingFace..."
127
- upload_backup "$backup_path" "${backup_file}"
128
-
129
- rm -f "$backup_path"
130
- else
131
- echo "Cloudreve directory does not exist yet, waiting for next sync..."
132
- fi
133
-
134
- SYNC_INTERVAL=${SYNC_INTERVAL:-3600} # 默认同步间隔为 1 小时
135
- echo "Next sync in ${SYNC_INTERVAL} seconds..."
136
- sleep $SYNC_INTERVAL
137
- done
138
- }
139
-
140
- sync_data &
141
-
142
- # 启动 Cloudreve
143
- echo "Starting Cloudreve..."
144
- exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
 
2
 
3
  # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
10
+ # 激活虚拟环境
11
+ source /opt/venv/bin/activate
12
+
13
  # 定义 Cloudreve 主程序目录
14
  CLOUDREVE_DIR="/opt/cloudreve"
15
  BACKUP_PREFIX="cloudreve_backup"
16
 
17
+ # Python 函数: 上传备份
 
 
 
 
 
18
  upload_backup() {
19
+ file_path="$1"
20
+ file_name="$2"
21
+ token="$HF_TOKEN"
22
+ repo_id="$DATASET_ID"
23
 
24
  python3 -c "
25
  from huggingface_hub import HfApi
26
  import sys
27
  import os
 
28
 
29
+ def manage_backups(api, repo_id, max_files=50):
30
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
31
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
32
  backup_files.sort()
 
40
  except Exception as e:
41
  print(f'Error deleting {file_to_delete}: {str(e)}')
42
 
43
+ api = HfApi(token='$token')
44
+ try:
45
+ api.upload_file(
46
+ path_or_fileobj='$file_path',
47
+ path_in_repo='$file_name',
48
+ repo_id='$repo_id',
49
+ repo_type='dataset'
50
+ )
51
+ print(f'Successfully uploaded $file_name')
52
+
53
+ manage_backups(api, '$repo_id')
54
+ except Exception as e:
55
+ print(f'Error uploading file: {str(e)}')
56
+ "
57
+ }
58
+
59
+ # Python 函数: 下载最新备份
60
+ download_latest_backup() {
61
+ token="$HF_TOKEN"
62
+ repo_id="$DATASET_ID"
63
+
64
+ python3 -c "
65
+ from huggingface_hub import HfApi
66
+ import sys
67
+ import os
68
+ import tarfile
69
+ import tempfile
70
+ api = HfApi(token='$token')
71
+ try:
72
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
73
+ backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
74
+
75
+ if not backup_files:
76
+ print('No backup files found')
77
+ sys.exit()
78
+
79
+ latest_backup = sorted(backup_files)[-1]
80
+
81
+ with tempfile.TemporaryDirectory() as temp_dir:
82
+ filepath = api.hf_hub_download(
83
  repo_id='$repo_id',
84
+ filename=latest_backup,
85
+ repo_type='dataset',
86
+ local_dir=temp_dir
87
  )
88
+
89
+ if filepath and os.path.exists(filepath):
90
+ # 删除现有的 Cloudreve 目录和配置文件
91
+ cd \"$CLOUDREVE_DIR\"
92
+ rm -rf cloudreve
93
+ rm -rf cloudreve.db
94
+ rm -rf config.ini
95
+
96
+ with tarfile.open(filepath, 'r:gz') as tar:
97
+ tar.extractall(\"$CLOUDREVE_DIR\")
98
+ echo f'Successfully restored backup from {latest_backup}'
99
+
100
+ except Exception as e:
101
+ print(f'Error downloading backup: {str(e)}')
102
+ "
103
+ }
104
+
105
+ # 首次启动时下载最新备份
106
+ echo "Downloading latest backup from HuggingFace..."
107
+ download_latest_backup
108
+
109
+ # 同步函数
110
+ sync_data() {
111
+ while true; do
112
+ echo "Starting sync process at $(date)"
113
+
114
+ if [ -d "$CLOUDREVE_DIR" ]; then
115
+ timestamp=$(date +%Y%m%d_%H%M%S)
116
+ backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
117
+
118
+ # 压缩数据目录和配置文件
119
+ tar -czf "/tmp/${backup_file}" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
120
+
121
+ echo "Uploading backup to HuggingFace..."
122
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
123
+
124
+ rm -f "/tmp/${backup_file}"
125
+ else
126
+ echo "Data directory does not exist yet, waiting for next sync..."
127
+ fi
128
+
129
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
130
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
131
+ sleep $SYNC_INTERVAL
132
+ done
133
+ }
134
+
135
+ # 后台启动同步进程
136
+ sync_data &
137
+
138
+ # 启动 Halo (这里需要启动 Cloudreve)
139
+ exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini