flzta commited on
Commit
ad99012
·
verified ·
1 Parent(s): 371394d

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +11 -9
sync_data.sh CHANGED
@@ -31,9 +31,9 @@ import os
31
  print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
32
  print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
33
 
34
- def manage_backups(api, repo_id, max_files=50):
35
  print('Managing old backups...')
36
- files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
37
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
38
  backup_files.sort()
39
 
@@ -43,7 +43,7 @@ def manage_backups(api, repo_id, max_files=50):
43
  for file_to_delete in files_to_delete:
44
  try:
45
  print(f'Deleting old backup: {file_to_delete}')
46
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
47
  print(f'Successfully deleted: {file_to_delete}')
48
  except Exception as e:
49
  print(f'Error deleting {file_to_delete}: {str(e)}')
@@ -52,16 +52,17 @@ def manage_backups(api, repo_id, max_files=50):
52
 
53
  api = HfApi(token='$token')
54
  try:
55
- print(f'Uploading file: $file_path to {repo_id} as $file_name')
 
56
  api.upload_file(
57
  path_or_fileobj='$file_path',
58
  path_in_repo='$file_name',
59
- repo_id=f'{repo_id}', # 使用 f-string 传递 repo_id
60
  repo_type='dataset'
61
  )
62
  print(f'Successfully uploaded $file_name')
63
 
64
- manage_backups(api, f'{repo_id}') # 使用 f-string 传递 repo_id
65
  except Exception as e:
66
  print(f'Error uploading file: {str(e)}')
67
  "
@@ -86,8 +87,9 @@ print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
86
 
87
  api = HfApi(token='$token')
88
  try:
89
- print(f'Listing files in Dataset: {repo_id}')
90
- files = api.list_repo_files(repo_id=f'{repo_id}', repo_type='dataset') # 使用 f-string 传递 repo_id
 
91
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
92
 
93
  if not backup_files:
@@ -99,7 +101,7 @@ try:
99
 
100
  with tempfile.TemporaryDirectory() as temp_dir:
101
  filepath = api.hf_hub_download(
102
- repo_id=f'{repo_id}', # 使用 f-string 传递 repo_id
103
  filename=latest_backup,
104
  repo_type='dataset',
105
  local_dir=temp_dir
 
31
  print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
32
  print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
33
 
34
+ def manage_backups(api, repo_id_val, max_files=50):
35
  print('Managing old backups...')
36
+ files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
37
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
38
  backup_files.sort()
39
 
 
43
  for file_to_delete in files_to_delete:
44
  try:
45
  print(f'Deleting old backup: {file_to_delete}')
46
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
47
  print(f'Successfully deleted: {file_to_delete}')
48
  except Exception as e:
49
  print(f'Error deleting {file_to_delete}: {str(e)}')
 
52
 
53
  api = HfApi(token='$token')
54
  try:
55
+ repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
56
+ print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
57
  api.upload_file(
58
  path_or_fileobj='$file_path',
59
  path_in_repo='$file_name',
60
+ repo_id=repo_id_val,
61
  repo_type='dataset'
62
  )
63
  print(f'Successfully uploaded $file_name')
64
 
65
+ manage_backups(api, repo_id_val)
66
  except Exception as e:
67
  print(f'Error uploading file: {str(e)}')
68
  "
 
87
 
88
  api = HfApi(token='$token')
89
  try:
90
+ repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
91
+ print(f'Listing files in Dataset: {repo_id_val}')
92
+ files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
93
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
94
 
95
  if not backup_files:
 
101
 
102
  with tempfile.TemporaryDirectory() as temp_dir:
103
  filepath = api.hf_hub_download(
104
+ repo_id=repo_id_val,
105
  filename=latest_backup,
106
  repo_type='dataset',
107
  local_dir=temp_dir