Update sync_data.sh
Browse files- sync_data.sh +11 -9
sync_data.sh
CHANGED
|
@@ -31,9 +31,9 @@ import os
|
|
| 31 |
print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
|
| 32 |
print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
|
| 33 |
|
| 34 |
-
def manage_backups(api,
|
| 35 |
print('Managing old backups...')
|
| 36 |
-
files = api.list_repo_files(repo_id=
|
| 37 |
backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
|
| 38 |
backup_files.sort()
|
| 39 |
|
|
@@ -43,7 +43,7 @@ def manage_backups(api, repo_id, max_files=50):
|
|
| 43 |
for file_to_delete in files_to_delete:
|
| 44 |
try:
|
| 45 |
print(f'Deleting old backup: {file_to_delete}')
|
| 46 |
-
api.delete_file(path_in_repo=file_to_delete, repo_id=
|
| 47 |
print(f'Successfully deleted: {file_to_delete}')
|
| 48 |
except Exception as e:
|
| 49 |
print(f'Error deleting {file_to_delete}: {str(e)}')
|
|
@@ -52,16 +52,17 @@ def manage_backups(api, repo_id, max_files=50):
|
|
| 52 |
|
| 53 |
api = HfApi(token='$token')
|
| 54 |
try:
|
| 55 |
-
|
|
|
|
| 56 |
api.upload_file(
|
| 57 |
path_or_fileobj='$file_path',
|
| 58 |
path_in_repo='$file_name',
|
| 59 |
-
repo_id=
|
| 60 |
repo_type='dataset'
|
| 61 |
)
|
| 62 |
print(f'Successfully uploaded $file_name')
|
| 63 |
|
| 64 |
-
manage_backups(api,
|
| 65 |
except Exception as e:
|
| 66 |
print(f'Error uploading file: {str(e)}')
|
| 67 |
"
|
|
@@ -86,8 +87,9 @@ print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
|
|
| 86 |
|
| 87 |
api = HfApi(token='$token')
|
| 88 |
try:
|
| 89 |
-
|
| 90 |
-
files
|
|
|
|
| 91 |
backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
|
| 92 |
|
| 93 |
if not backup_files:
|
|
@@ -99,7 +101,7 @@ try:
|
|
| 99 |
|
| 100 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 101 |
filepath = api.hf_hub_download(
|
| 102 |
-
repo_id=
|
| 103 |
filename=latest_backup,
|
| 104 |
repo_type='dataset',
|
| 105 |
local_dir=temp_dir
|
|
|
|
| 31 |
print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
|
| 32 |
print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
|
| 33 |
|
| 34 |
+
def manage_backups(api, repo_id_val, max_files=50):
|
| 35 |
print('Managing old backups...')
|
| 36 |
+
files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
|
| 37 |
backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
|
| 38 |
backup_files.sort()
|
| 39 |
|
|
|
|
| 43 |
for file_to_delete in files_to_delete:
|
| 44 |
try:
|
| 45 |
print(f'Deleting old backup: {file_to_delete}')
|
| 46 |
+
api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
|
| 47 |
print(f'Successfully deleted: {file_to_delete}')
|
| 48 |
except Exception as e:
|
| 49 |
print(f'Error deleting {file_to_delete}: {str(e)}')
|
|
|
|
| 52 |
|
| 53 |
api = HfApi(token='$token')
|
| 54 |
try:
|
| 55 |
+
repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
|
| 56 |
+
print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
|
| 57 |
api.upload_file(
|
| 58 |
path_or_fileobj='$file_path',
|
| 59 |
path_in_repo='$file_name',
|
| 60 |
+
repo_id=repo_id_val,
|
| 61 |
repo_type='dataset'
|
| 62 |
)
|
| 63 |
print(f'Successfully uploaded $file_name')
|
| 64 |
|
| 65 |
+
manage_backups(api, repo_id_val)
|
| 66 |
except Exception as e:
|
| 67 |
print(f'Error uploading file: {str(e)}')
|
| 68 |
"
|
|
|
|
| 87 |
|
| 88 |
api = HfApi(token='$token')
|
| 89 |
try:
|
| 90 |
+
repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
|
| 91 |
+
print(f'Listing files in Dataset: {repo_id_val}')
|
| 92 |
+
files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
|
| 93 |
backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
|
| 94 |
|
| 95 |
if not backup_files:
|
|
|
|
| 101 |
|
| 102 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 103 |
filepath = api.hf_hub_download(
|
| 104 |
+
repo_id=repo_id_val,
|
| 105 |
filename=latest_backup,
|
| 106 |
repo_type='dataset',
|
| 107 |
local_dir=temp_dir
|