Spaces:

flzta
/

data

Paused

File size: 6,754 Bytes

c2e15e4
c4f3936
e28913e
7d404ab
afb63c7
c2e15e4
c4f3936
 
 
afb63c7
 
 
e28913e
 
 
 
 
c4f3936
afb63c7
 
 
 
e28913e
 
17a9d42
c4f3936
 
 
 
e28913e
 
 
 
 
 
c4f3936
 
e28913e
c4f3936
 
 
e28913e
 
 
c4f3936
 
e28913e
 
afb63c7
 
e28913e
 
afb63c7
e28913e
 
 
afb63c7
 
e28913e
 
afb63c7
 
 
 
 
e28913e
afb63c7
e28913e
 
17a9d42
e28913e
 
 
afb63c7
 
 
 
 
e28913e
 
afb63c7
 
e28913e
 
 
 
afb63c7
e28913e
afb63c7
 
e28913e
afb63c7
e28913e
 
afb63c7
 
 
c4f3936
e28913e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4fe845
e28913e
 
 
 
afb63c7
 
 
 
 
 
e28913e
afb63c7
 
 
 
e28913e
afb63c7
 
 
e28913e
 
 
 
6139425
e28913e
 
 
 
 
 
6139425
e28913e
 
6139425
 
e28913e
6139425
e28913e
6139425
 
 
 
e28913e
6139425
 
 
 
 
e28913e
 
 
6139425
 
 
e28913e
 
6139425

#!/bin/bash

# 检查 Hugging Face Token 和 Dataset ID 环境变量
if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
    echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
    exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
    exit 0
fi

# 激活虚拟环境
source /opt/venv/bin/activate

# 定义 Cloudreve 主程序目录
CLOUDREVE_DIR="/opt/cloudreve"
BACKUP_PREFIX="cloudreve_backup"

# Python 函数: 上传备份
upload_backup() {
    file_path="$1"
    file_name="$2"
    token="$HF_TOKEN"
    repo_id="$DATASET_ID"

    echo "Preparing to upload backup file: $file_path as $file_name to Dataset: $repo_id"

    python3 -c "
from huggingface_hub import HfApi
import sys
import os
print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
def manage_backups(api, repo_id_val, max_files=5):
    print('Managing old backups...')
    files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
    backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
    backup_files.sort()
    if len(backup_files) >= max_files:
        print(f'Found {len(backup_files)} backup files, maximum allowed is {max_files}.')
        files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
        for file_to_delete in files_to_delete:
            try:
                print(f'Deleting old backup: {file_to_delete}')
                api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
                print(f'Successfully deleted: {file_to_delete}')
            except Exception as e:
                print(f'Error deleting {file_to_delete}: {str(e)}')
    else:
        print('Number of backup files is within the limit.')
api = HfApi(token='$token')
try:
    repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
    print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
    api.upload_file(
        path_or_fileobj='$file_path',
        path_in_repo='$file_name',
        repo_id=repo_id_val,
        repo_type='dataset'
    )
    print(f'Successfully uploaded $file_name')
    manage_backups(api, repo_id_val)
except Exception as e:
    print(f'Error uploading file: {str(e)}')
"
}

# Python 函数: 下载最新备份
download_latest_backup() {
  token="$HF_TOKEN"
  repo_id="$DATASET_ID"

  echo "Preparing to download the latest backup from Dataset: $repo_id"

  python3 -c "
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile
print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
api = HfApi(token='$token')
try:
    repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
    print(f'Listing files in Dataset: {repo_id_val}')
    files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
    backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
    if not backup_files:
        print('No backup files found in the Dataset.')
        sys.exit()
    latest_backup = sorted(backup_files)[-1]
    print(f'Latest backup file found: {latest_backup}')
    with tempfile.TemporaryDirectory() as temp_dir:
        filepath = api.hf_hub_download(
            repo_id=repo_id_val,
            filename=latest_backup,
            repo_type='dataset',
            local_dir=temp_dir
        )
        if filepath and os.path.exists(filepath):
            print(f'Successfully downloaded backup to temporary directory: {filepath}')
            print(\"Before restoring backup:\")
            import subprocess
            subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
            # 删除现有的 Cloudreve 目录和配置文件
            import shutil
            cloudreve_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve\")
            cloudreve_db_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve.db\")
            config_ini_path = os.path.join(\"$CLOUDREVE_DIR\", \"config.ini\")
            if os.path.exists(cloudreve_path):
                print(f'Deleting: {cloudreve_path}')
                shutil.rmtree(cloudreve_path, ignore_errors=True)
            if os.path.exists(cloudreve_db_path):
                print(f'Deleting: {cloudreve_db_path}')
                os.remove(cloudreve_db_path)
            if os.path.exists(config_ini_path):
                print(f'Deleting: {config_ini_path}')
                os.remove(config_ini_path)
            print(\"Deletion complete.\")
            print(f'Extracting backup archive: {filepath} to $CLOUDREVE_DIR')
            import tarfile
            with tarfile.open(filepath, 'r:gz') as tar:
                tar.extractall(\"$CLOUDREVE_DIR\")
            print(f'Successfully restored backup from {latest_backup}')
            print(\"After restoring backup:\")
            subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
        else:
            print('Error during file download.')
except Exception as e:
    print(f'Error downloading backup: {str(e)}')
"
}

# 首次启动时下载最新备份
echo "Downloading latest backup from HuggingFace..."
download_latest_backup

# 同步函数
sync_data() {
    echo "SYNC_DATA FUNCTION IS RUNNING" # 添加了这一行
    while true; do
        echo "Starting sync process at $(date)"

        if [ -d "$CLOUDREVE_DIR" ]; then
            echo "Before compression:"
            ls -l \"$CLOUDREVE_DIR\"

            timestamp=$(date +%Y%m%d_%H%M%S)
            backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
            backup_path="/tmp/${backup_file}"

            echo "Compressing Cloudreve directory (including database and config) to: $backup_path"
            tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
            echo "Compression complete."

            echo "After compression:"
            ls -l "$backup_path"

            echo "Uploading backup to HuggingFace..."
            upload_backup "$backup_path" "${backup_file}"

            rm -f "$backup_path"
        else
            echo "Cloudreve directory does not exist yet, waiting for next sync..."
        fi

        SYNC_INTERVAL=${SYNC_INTERVAL:-60} # 默认同步间隔改为 60 秒
        echo "Next sync in ${SYNC_INTERVAL} seconds..."
        sleep $SYNC_INTERVAL
    done
}

# 延迟启动同步脚本，给 Cloudreve 一些启动时间
sleep 10

# 后台启动同步进程
sync_data &

# 启动 Halo (这里需要启动 Cloudreve)
echo "Starting Cloudreve..."
exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini