Spaces:

flzta
/

data

Paused

App Files Files Community

flzta commited on Mar 25, 2025

Commit

17a9d42

verified ·

1 Parent(s): afb63c7

Update sync_data.sh

Browse files

Files changed (1) hide show

sync_data.sh +47 -7

sync_data.sh CHANGED Viewed

@@ -21,27 +21,38 @@ upload_backup() {
     token="$HF_TOKEN"
     repo_id="$DATASET_ID"
     python3 -c "
 from huggingface_hub import HfApi
 import sys
 import os
 def manage_backups(api, repo_id, max_files=50):
     files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     backup_files.sort()
     if len(backup_files) >= max_files:
         files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
         for file_to_delete in files_to_delete:
             try:
                 api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
-                print(f'Deleted old backup: {file_to_delete}')
             except Exception as e:
                 print(f'Error deleting {file_to_delete}: {str(e)}')
 api = HfApi(token='$token')
 try:
     api.upload_file(
         path_or_fileobj='$file_path',
         path_in_repo='$file_name',
@@ -61,22 +72,30 @@ download_latest_backup() {
   token="$HF_TOKEN"
   repo_id="$DATASET_ID"
   python3 -c "
 from huggingface_hub import HfApi
 import sys
 import os
 import tarfile
 import tempfile
 api = HfApi(token='$token')
 try:
     files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     if not backup_files:
-        print('No backup files found')
         sys.exit()
     latest_backup = sorted(backup_files)[-1]
     with tempfile.TemporaryDirectory() as temp_dir:
         filepath = api.hf_hub_download(
@@ -87,16 +106,28 @@ try:
         )
         if filepath and os.path.exists(filepath):
             # 删除现有的 Cloudreve 目录和配置文件
             cd \"$CLOUDREVE_DIR\"
             rm -rf cloudreve
             rm -rf cloudreve.db
             rm -rf config.ini
             with tarfile.open(filepath, 'r:gz') as tar:
                 tar.extractall(\"$CLOUDREVE_DIR\")
             echo f'Successfully restored backup from {latest_backup}'
 except Exception as e:
     print(f'Error downloading backup: {str(e)}')
 "
@@ -112,18 +143,26 @@ sync_data() {
         echo "Starting sync process at $(date)"
         if [ -d "$CLOUDREVE_DIR" ]; then
             timestamp=$(date +%Y%m%d_%H%M%S)
             backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
-            # 压缩数据目录和配置文件
-            tar -czf "/tmp/${backup_file}" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
             echo "Uploading backup to HuggingFace..."
-            upload_backup "/tmp/${backup_file}" "${backup_file}"
-            rm -f "/tmp/${backup_file}"
         else
-            echo "Data directory does not exist yet, waiting for next sync..."
         fi
         SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
@@ -136,4 +175,5 @@ sync_data() {
 sync_data &
 # 启动 Halo (这里需��启动 Cloudreve)
 exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini

     token="$HF_TOKEN"
     repo_id="$DATASET_ID"
+    echo "Preparing to upload backup file: $file_path as $file_name to Dataset: $repo_id"
     python3 -c "
 from huggingface_hub import HfApi
 import sys
 import os
+print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
+print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
 def manage_backups(api, repo_id, max_files=50):
+    print('Managing old backups...')
     files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     backup_files.sort()
     if len(backup_files) >= max_files:
+        print(f'Found {len(backup_files)} backup files, maximum allowed is {max_files}.')
         files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
         for file_to_delete in files_to_delete:
             try:
+                print(f'Deleting old backup: {file_to_delete}')
                 api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
+                print(f'Successfully deleted: {file_to_delete}')
             except Exception as e:
                 print(f'Error deleting {file_to_delete}: {str(e)}')
+    else:
+        print('Number of backup files is within the limit.')
 api = HfApi(token='$token')
 try:
+    print(f'Uploading file: $file_path to {repo_id} as $file_name')
     api.upload_file(
         path_or_fileobj='$file_path',
         path_in_repo='$file_name',
   token="$HF_TOKEN"
   repo_id="$DATASET_ID"
+  echo "Preparing to download the latest backup from Dataset: $repo_id"
   python3 -c "
 from huggingface_hub import HfApi
 import sys
 import os
 import tarfile
 import tempfile
+print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
+print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
 api = HfApi(token='$token')
 try:
+    print(f'Listing files in Dataset: {repo_id}')
     files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     if not backup_files:
+        print('No backup files found in the Dataset.')
         sys.exit()
     latest_backup = sorted(backup_files)[-1]
+    print(f'Latest backup file found: {latest_backup}')
     with tempfile.TemporaryDirectory() as temp_dir:
         filepath = api.hf_hub_download(
         )
         if filepath and os.path.exists(filepath):
+            print(f'Successfully downloaded backup to temporary directory: {filepath}')
+            echo \"Before restoring backup:\"
+            ls -l \"$CLOUDREVE_DIR\"
             # 删除现有的 Cloudreve 目录和配置文件
             cd \"$CLOUDREVE_DIR\"
+            echo \"Deleting existing Cloudreve files...\"
             rm -rf cloudreve
             rm -rf cloudreve.db
             rm -rf config.ini
+            echo \"Deletion complete.\"
+            echo \"Extracting backup archive: $filepath to $CLOUDREVE_DIR\"
             with tarfile.open(filepath, 'r:gz') as tar:
                 tar.extractall(\"$CLOUDREVE_DIR\")
             echo f'Successfully restored backup from {latest_backup}'
+            echo \"After restoring backup:\"
+            ls -l \"$CLOUDREVE_DIR\"
+        else:
+            print('Error during file download.')
 except Exception as e:
     print(f'Error downloading backup: {str(e)}')
 "
         echo "Starting sync process at $(date)"
         if [ -d "$CLOUDREVE_DIR" ]; then
+            echo "Before compression:"
+            ls -l \"$CLOUDREVE_DIR\"
             timestamp=$(date +%Y%m%d_%H%M%S)
             backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
+            backup_path="/tmp/${backup_file}"
+            echo "Compressing Cloudreve directory (including database and config) to: $backup_path"
+            tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
+            echo "Compression complete."
+            echo "After compression:"
+            ls -l "$backup_path"
             echo "Uploading backup to HuggingFace..."
+            upload_backup "$backup_path" "${backup_file}"
+            rm -f "$backup_path"
         else
+            echo "Cloudreve directory does not exist yet, waiting for next sync..."
         fi
         SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
 sync_data &
 # 启动 Halo (这里需��启动 Cloudreve)
+echo "Starting Cloudreve..."
 exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini