flzt / sync_data.sh
flzta's picture
Update sync_data.sh
b8246a4 verified
#!/bin/bash
# 检查环境变量
if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
exit 0
fi
# 设置解密密钥 (请务必设置一个长且随机的字符串)
ENCRYPTION_KEY=${ENCRYPTION_KEY:-"请在此处设置您的加密密钥,这是一个长且随机的字符串"}
BACKUP_DELAY=${BACKUP_DELAY:-7200} # 默认备份间隔为 2 小时
# 激活虚拟环境
source /opt/venv/bin/activate
# 上传备份
upload_backup() {
file_path="$1"
file_name="$2"
token="$HF_TOKEN"
repo_id="$DATASET_ID"
encryption_key="$ENCRYPTION_KEY"
python3 -c "
from huggingface_hub import HfApi
import sys
import os
import base64
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
import io
def generate_key(password, salt=b'cloudreve_salt'):
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
return key
def encrypt_file(file_path, key):
f = Fernet(key)
with open(file_path, 'rb') as file:
file_data = file.read()
encrypted_data = f.encrypt(file_data)
encrypted_file_path = file_path + '.enc'
with open(encrypted_file_path, 'wb') as file:
file.write(encrypted_data)
return encrypted_file_path
def manage_backups(api, repo_id, max_files=2):
files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
backup_files.sort()
if len(backup_files) > max_files:
files_to_delete = backup_files[:(len(backup_files) - max_files)]
for file_to_delete in files_to_delete:
try:
api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
print(f'Deleted old backup: {file_to_delete}')
except Exception as e:
print(f'Error deleting {file_to_delete}: {str(e)}')
api = HfApi(token='$token')
try:
# 生成加密密钥
key = generate_key('$encryption_key')
# 加密文件
encrypted_file_path = encrypt_file('$file_path', key)
# 上传加密文件
api.upload_file(
path_or_fileobj=encrypted_file_path,
path_in_repo='$file_name.enc',
repo_id='$repo_id',
repo_type='dataset'
)
print(f'Successfully uploaded encrypted $file_name')
# 删除临时加密文件
os.remove(encrypted_file_path)
# 管理备份文件数量
manage_backups(api, '$repo_id')
except Exception as e:
print(f'Error uploading file: {str(e)}')
"
}
# 下载最新备份
download_latest_backup() {
token="$HF_TOKEN"
repo_id="$DATASET_ID"
encryption_key="$ENCRYPTION_KEY"
python3 -c "
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile
import base64
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
def generate_key(password, salt=b'cloudreve_salt'):
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
return key
def decrypt_file(encrypted_file_path, key):
f = Fernet(key)
with open(encrypted_file_path, 'rb') as file:
encrypted_data = file.read()
decrypted_data = f.decrypt(encrypted_data)
decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀
with open(decrypted_file_path, 'wb') as file:
file.write(decrypted_data)
return decrypted_file_path
api = HfApi(token='$token')
try:
files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
if not backup_files:
print('No backup files found')
sys.exit()
latest_backup = sorted(backup_files)[-1]
with tempfile.TemporaryDirectory() as temp_dir:
# 下载加密的备份文件
encrypted_filepath = api.hf_hub_download(
repo_id='$repo_id',
filename=latest_backup,
repo_type='dataset',
local_dir=temp_dir
)
if encrypted_filepath and os.path.exists(encrypted_filepath):
# 生成解密密钥
key = generate_key('$encryption_key')
# 解密文件
decrypted_filepath = decrypt_file(encrypted_filepath, key)
# 解压缩到目标目录
with tarfile.open(decrypted_filepath, 'r:gz') as tar:
tar.extractall('/opt/cloudreve')
print(f'Successfully restored backup from {latest_backup}')
# 清理临时文件
os.remove(decrypted_filepath)
except Exception as e:
print(f'Error downloading backup: {str(e)}')
"
}
# 首次启动时下载最新备份
echo "Checking for latest backup from HuggingFace..."
download_latest_backup
# 同步函数
sync_data() {
local initial_backup_done=false
while true; do
echo "Starting sync process at $(date)"
if [ "$initial_backup_done" = "true" ]; then
# 在这里不进行 sleep,第一次备份后立即进入下方备份流程
fi
if [ -d /opt/cloudreve ]; then
timestamp=$(date +%Y%m%d_%H%M%S)
backup_file="cloudreve_backup_${timestamp}.tar.gz"
# 压缩整个 Cloudreve 目录
tar -czf "/tmp/${backup_file}" -C /opt/cloudreve .
echo "Uploading backup to HuggingFace..."
upload_backup "/tmp/${backup_file}" "${backup_file}"
rm -f "/tmp/${backup_file}"
initial_backup_done=true
echo "Backup completed. Waiting for ${BACKUP_DELAY} seconds before the next backup."
sleep "$BACKUP_DELAY" # 将 sleep 移动到这里
else
echo "Cloudreve directory does not exist yet, waiting..."
sleep 60 # 如果 Cloudreve 目录不存在,等待 60 秒后重试
fi
done
}
# 后台启动同步进程
sync_data &
# 启动 Cloudreve
exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini