Spaces:
Runtime error
Runtime error
File size: 4,506 Bytes
08e5d06 5fa24bd 6f33faf 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 08e5d06 5fa24bd 39941cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
#!/bin/sh
# 检查环境变量
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
exit 1
fi
# 激活虚拟环境
. /app/venv/bin/activate
# 上传备份
cat > hf_sync.py << 'EOL'
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile
def manage_backups(api, repo_id, max_files=50):
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
backup_files.sort()
if len(backup_files) >= max_files:
files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
for file_to_delete in files_to_delete:
try:
api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type="dataset")
print(f'Deleted old backup: {file_to_delete}')
except Exception as e:
print(f'Error deleting {file_to_delete}: {str(e)}')
def upload_backup(file_path, file_name, token, repo_id):
api = HfApi(token=token)
try:
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=file_name,
repo_id=repo_id,
repo_type="dataset"
)
print(f"Successfully uploaded {file_name}")
manage_backups(api, repo_id)
except Exception as e:
print(f"Error uploading file: {str(e)}")
# 下载最新备份
def download_latest_backup(token, repo_id, extract_path):
try:
api = HfApi(token=token)
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
if not backup_files:
print("No backup files found")
return
latest_backup = sorted(backup_files)[-1]
with tempfile.TemporaryDirectory() as temp_dir:
filepath = api.hf_hub_download(
repo_id=repo_id,
filename=latest_backup,
repo_type="dataset",
local_dir=temp_dir
)
if filepath and os.path.exists(filepath):
with tarfile.open(filepath, 'r:gz') as tar:
tar.extractall(extract_path) # 解压到指定路径
print(f"Successfully restored backup from {latest_backup}")
except Exception as e:
print(f"Error downloading backup: {str(e)}")
if __name__ == "__main__":
action = sys.argv[1]
token = sys.argv[2]
repo_id = sys.argv[3]
if action == "upload":
file_path = sys.argv[4]
file_name = sys.argv[5]
upload_backup(file_path, file_name, token, repo_id)
elif action == "download":
extract_path = sys.argv[4] if len(sys.argv) > 4 else '.' # 默认为当前目录
download_latest_backup(token, repo_id, extract_path)
EOL
# 首次启动时从HuggingFace下载最新备份(解压到应用目录)
echo "Downloading latest backup from HuggingFace..."
python hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "./"
# 同步函数
sync_data() {
while true; do
echo "Starting sync process at $(date)"
# 确保数据目录存在(选择你的实际路径)
STORAGE_PATH="./storage" # 或改为"./storage"
if [ -d "${STORAGE_PATH}" ]; then
# 创建备份
timestamp=$(date +%Y%m%d_%H%M%S)
backup_file="backup_${timestamp}.tar.gz"
# 压缩目录(使用-C避免包含父路径)
tar -czf "/tmp/${backup_file}" -C "$(dirname "${STORAGE_PATH}")" "$(basename "${STORAGE_PATH}")"
# 上传到HuggingFace
echo "Uploading backup to HuggingFace..."
python hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
# 清理临时文件
rm -f "/tmp/${backup_file}"
else
echo "Storage directory ${STORAGE_PATH} does not exist, waiting..."
fi
# 同步间隔
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
echo "Next sync in ${SYNC_INTERVAL} seconds..."
sleep $SYNC_INTERVAL
done
}
# 启动同步进程
sync_data &
# 启动主应用(根据实际路径调整)
exec bash install_reader.sh # 或改为你的启动命令 |