Update sync_data.sh
Browse files- sync_data.sh +7 -16
sync_data.sh
CHANGED
|
@@ -11,7 +11,7 @@ fi
|
|
| 11 |
|
| 12 |
# 上传备份
|
| 13 |
cat > /easynode/app/hf_sync.py << 'EOL'
|
| 14 |
-
from huggingface_hub import HfApi
|
| 15 |
import sys
|
| 16 |
import os
|
| 17 |
import tarfile
|
|
@@ -32,12 +32,8 @@ def manage_backups(api, repo_id, max_files=50):
|
|
| 32 |
print(f'Error deleting {file_to_delete}: {str(e)}')
|
| 33 |
|
| 34 |
def upload_backup(file_path, file_name, token, repo_id):
|
| 35 |
-
|
| 36 |
-
login(token=token)
|
| 37 |
-
api = HfApi()
|
| 38 |
-
|
| 39 |
try:
|
| 40 |
-
# 使用HTTP-based上传方法
|
| 41 |
api.upload_file(
|
| 42 |
path_or_fileobj=file_path,
|
| 43 |
path_in_repo=file_name,
|
|
@@ -53,10 +49,7 @@ def upload_backup(file_path, file_name, token, repo_id):
|
|
| 53 |
# 下载最新备份
|
| 54 |
def download_latest_backup(token, repo_id):
|
| 55 |
try:
|
| 56 |
-
|
| 57 |
-
login(token=token)
|
| 58 |
-
api = HfApi()
|
| 59 |
-
|
| 60 |
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
| 61 |
backup_files = [f for f in files if f.startswith('easynode_backup_') and f.endswith('.tar.gz')]
|
| 62 |
|
|
@@ -67,13 +60,11 @@ def download_latest_backup(token, repo_id):
|
|
| 67 |
latest_backup = sorted(backup_files)[-1]
|
| 68 |
|
| 69 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 70 |
-
# 使用HTTP-based下载方法
|
| 71 |
filepath = api.hf_hub_download(
|
| 72 |
repo_id=repo_id,
|
| 73 |
filename=latest_backup,
|
| 74 |
repo_type="dataset",
|
| 75 |
-
local_dir=temp_dir
|
| 76 |
-
use_auth_token=token
|
| 77 |
)
|
| 78 |
|
| 79 |
if filepath and os.path.exists(filepath):
|
|
@@ -99,7 +90,7 @@ EOL
|
|
| 99 |
|
| 100 |
# 首次启动时下载最新备份
|
| 101 |
echo "Downloading latest backup from HuggingFace..."
|
| 102 |
-
python
|
| 103 |
|
| 104 |
# 同步函数
|
| 105 |
sync_data() {
|
|
@@ -110,10 +101,10 @@ sync_data() {
|
|
| 110 |
timestamp=$(date +%Y%m%d_%H%M%S)
|
| 111 |
backup_file="easynode_backup_${timestamp}.tar.gz"
|
| 112 |
|
| 113 |
-
tar -czf "/tmp/${backup_file}"
|
| 114 |
|
| 115 |
echo "Uploading backup to HuggingFace..."
|
| 116 |
-
python
|
| 117 |
|
| 118 |
rm -f "/tmp/${backup_file}"
|
| 119 |
else
|
|
|
|
| 11 |
|
| 12 |
# 上传备份
|
| 13 |
cat > /easynode/app/hf_sync.py << 'EOL'
|
| 14 |
+
from huggingface_hub import HfApi
|
| 15 |
import sys
|
| 16 |
import os
|
| 17 |
import tarfile
|
|
|
|
| 32 |
print(f'Error deleting {file_to_delete}: {str(e)}')
|
| 33 |
|
| 34 |
def upload_backup(file_path, file_name, token, repo_id):
|
| 35 |
+
api = HfApi(token=token)
|
|
|
|
|
|
|
|
|
|
| 36 |
try:
|
|
|
|
| 37 |
api.upload_file(
|
| 38 |
path_or_fileobj=file_path,
|
| 39 |
path_in_repo=file_name,
|
|
|
|
| 49 |
# 下载最新备份
|
| 50 |
def download_latest_backup(token, repo_id):
|
| 51 |
try:
|
| 52 |
+
api = HfApi(token=token)
|
|
|
|
|
|
|
|
|
|
| 53 |
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
| 54 |
backup_files = [f for f in files if f.startswith('easynode_backup_') and f.endswith('.tar.gz')]
|
| 55 |
|
|
|
|
| 60 |
latest_backup = sorted(backup_files)[-1]
|
| 61 |
|
| 62 |
with tempfile.TemporaryDirectory() as temp_dir:
|
|
|
|
| 63 |
filepath = api.hf_hub_download(
|
| 64 |
repo_id=repo_id,
|
| 65 |
filename=latest_backup,
|
| 66 |
repo_type="dataset",
|
| 67 |
+
local_dir=temp_dir
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
if filepath and os.path.exists(filepath):
|
|
|
|
| 90 |
|
| 91 |
# 首次启动时下载最新备份
|
| 92 |
echo "Downloading latest backup from HuggingFace..."
|
| 93 |
+
python hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}"
|
| 94 |
|
| 95 |
# 同步函数
|
| 96 |
sync_data() {
|
|
|
|
| 101 |
timestamp=$(date +%Y%m%d_%H%M%S)
|
| 102 |
backup_file="easynode_backup_${timestamp}.tar.gz"
|
| 103 |
|
| 104 |
+
tar -czf "/tmp/${backup_file}" db/
|
| 105 |
|
| 106 |
echo "Uploading backup to HuggingFace..."
|
| 107 |
+
python hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
|
| 108 |
|
| 109 |
rm -f "/tmp/${backup_file}"
|
| 110 |
else
|