Spaces:

flzta
/

data

Paused

App Files Files Community

flzta commited on Mar 25, 2025

Commit

afb63c7

verified ·

1 Parent(s): 73ce868

Update sync_data.sh

Browse files

Files changed (1) hide show

sync_data.sh +105 -110

sync_data.sh CHANGED Viewed

@@ -2,34 +2,31 @@
 # 检查 Hugging Face Token 和 Dataset ID 环境变量
 if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
-    echo "Starting Cloudreve without backup functionality - missing HF_TOKEN or DATASET_ID"
     exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
     exit 0
 fi
 # 定义 Cloudreve 主程序目录
 CLOUDREVE_DIR="/opt/cloudreve"
 BACKUP_PREFIX="cloudreve_backup"
-# 激活 Python 虚拟环境
-if [ -f "/opt/venv/bin/activate" ]; then
-    source /opt/venv/bin/activate
-fi
-# Python 函数：上传备份
 upload_backup() {
-    local file_path="$1"
-    local file_name="$2"
-    local token="$HF_TOKEN"
-    local repo_id="$DATASET_ID"
     python3 -c "
 from huggingface_hub import HfApi
 import sys
 import os
-import glob
-def manage_backups(api, repo_id, max_files=5):
     files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     backup_files.sort()
@@ -43,102 +40,100 @@ def manage_backups(api, repo_id, max_files=5):
             except Exception as e:
                 print(f'Error deleting {file_to_delete}: {str(e)}')
-    api = HfApi(token='$token')
-    try:
-        api.upload_file(
-            path_or_fileobj='$file_path',
-            path_in_repo='$file_name',
             repo_id='$repo_id',
-            repo_type='dataset'
         )
-        print(f'Successfully uploaded $file_name')
-        manage_backups(api, '$repo_id')
-    except Exception as e:
-        print(f'Error uploading file: {str(e)}')
-    "
-    }
-    # Python 函数：下载最新备份
-    download_latest_backup() {
-        local token="$HF_TOKEN"
-        local repo_id="$DATASET_ID"
-        python3 -c "
-    from huggingface_hub import HfApi
-    import sys
-    import os
-    import tarfile
-    import tempfile
-    import glob
-    api = HfApi(token='$token')
-    try:
-        files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
-        backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
-        if not backup_files:
-            print('No backup files found.')
-            sys.exit()
-        latest_backup = sorted(backup_files)[-1]
-        with tempfile.TemporaryDirectory() as temp_dir:
-            filepath = api.hf_hub_download(
-                repo_id='$repo_id',
-                filename=latest_backup,
-                repo_type='dataset',
-                local_dir=temp_dir
-            )
-            if filepath and os.path.exists(filepath):
-                # 删除现有的 Cloudreve 目录和配置文件 (除了 data 目录，如果 data 目录在主目录下)
-                cd \"$CLOUDREVE_DIR\"
-                if [ -f cloudreve ] ; then rm -f cloudreve; fi
-                if [ -f cloudreve.db ] ; then rm -f cloudreve.db; fi
-                if [ -f config.ini ] ; then rm -f config.ini; fi
-                with tarfile.open(filepath, 'r:gz') as tar:
-                    tar.extractall(\"$CLOUDREVE_DIR\")
-                echo f'Successfully restored backup from {latest_backup}'
-    except Exception as e:
-        print(f'Error downloading backup: {str(e)}')
-    "
-    }
-    # 首次启动时下载最新备份
-    echo "Downloading latest backup from HuggingFace..."
-    download_latest_backup
-    # 后台启动同步进程
-    sync_data() {
-        while true; do
-            echo "Starting sync process at $(date)"
-            if [ -d "$CLOUDREVE_DIR" ]; then
-                timestamp=$(date +%Y%m%d_%H%M%S)
-                backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
-                backup_path="/tmp/${backup_file}"
-                echo "Compressing Cloudreve directory (including database and config)..."
-                tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
-                echo "Uploading backup to HuggingFace..."
-                upload_backup "$backup_path" "${backup_file}"
-                rm -f "$backup_path"
-            else
-                echo "Cloudreve directory does not exist yet, waiting for next sync..."
-            fi
-            SYNC_INTERVAL=${SYNC_INTERVAL:-3600} # 默认同步间隔为 1 小时
-            echo "Next sync in ${SYNC_INTERVAL} seconds..."
-            sleep $SYNC_INTERVAL
-        done
-    }
-    sync_data &
-    # 启动 Cloudreve
-    echo "Starting Cloudreve..."
-    exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini

 # 检查 Hugging Face Token 和 Dataset ID 环境变量
 if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
+    echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
     exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
     exit 0
 fi
+# 激活虚拟环境
+source /opt/venv/bin/activate
 # 定义 Cloudreve 主程序目录
 CLOUDREVE_DIR="/opt/cloudreve"
 BACKUP_PREFIX="cloudreve_backup"
+# Python 函数: 上传备份
 upload_backup() {
+    file_path="$1"
+    file_name="$2"
+    token="$HF_TOKEN"
+    repo_id="$DATASET_ID"
     python3 -c "
 from huggingface_hub import HfApi
 import sys
 import os
+def manage_backups(api, repo_id, max_files=50):
     files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     backup_files.sort()
             except Exception as e:
                 print(f'Error deleting {file_to_delete}: {str(e)}')
+api = HfApi(token='$token')
+try:
+    api.upload_file(
+        path_or_fileobj='$file_path',
+        path_in_repo='$file_name',
+        repo_id='$repo_id',
+        repo_type='dataset'
+    )
+    print(f'Successfully uploaded $file_name')
+    manage_backups(api, '$repo_id')
+except Exception as e:
+    print(f'Error uploading file: {str(e)}')
+"
+}
+# Python 函数: 下载最新备份
+download_latest_backup() {
+  token="$HF_TOKEN"
+  repo_id="$DATASET_ID"
+  python3 -c "
+from huggingface_hub import HfApi
+import sys
+import os
+import tarfile
+import tempfile
+api = HfApi(token='$token')
+try:
+    files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
+    backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
+    if not backup_files:
+        print('No backup files found')
+        sys.exit()
+    latest_backup = sorted(backup_files)[-1]
+    with tempfile.TemporaryDirectory() as temp_dir:
+        filepath = api.hf_hub_download(
             repo_id='$repo_id',
+            filename=latest_backup,
+            repo_type='dataset',
+            local_dir=temp_dir
         )
+        if filepath and os.path.exists(filepath):
+            # 删除现有的 Cloudreve 目录和配置文件
+            cd \"$CLOUDREVE_DIR\"
+            rm -rf cloudreve
+            rm -rf cloudreve.db
+            rm -rf config.ini
+            with tarfile.open(filepath, 'r:gz') as tar:
+                tar.extractall(\"$CLOUDREVE_DIR\")
+            echo f'Successfully restored backup from {latest_backup}'
+except Exception as e:
+    print(f'Error downloading backup: {str(e)}')
+"
+}
+# 首次启动时下载最新备份
+echo "Downloading latest backup from HuggingFace..."
+download_latest_backup
+# 同步函数
+sync_data() {
+    while true; do
+        echo "Starting sync process at $(date)"
+        if [ -d "$CLOUDREVE_DIR" ]; then
+            timestamp=$(date +%Y%m%d_%H%M%S)
+            backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
+            # 压缩数据目录和配置文件
+            tar -czf "/tmp/${backup_file}" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
+            echo "Uploading backup to HuggingFace..."
+            upload_backup "/tmp/${backup_file}" "${backup_file}"
+            rm -f "/tmp/${backup_file}"
+        else
+            echo "Data directory does not exist yet, waiting for next sync..."
+        fi
+        SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
+        echo "Next sync in ${SYNC_INTERVAL} seconds..."
+        sleep $SYNC_INTERVAL
+    done
+}
+# 后台启动同步进程
+sync_data &
+# 启动 Halo (这里需要启动 Cloudreve)
+exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini