mao123qw commited on
Commit
ba0eecc
·
verified ·
1 Parent(s): 1e4d33f

Create hf_sync.py

Browse files
Files changed (1) hide show
  1. hf_sync.py +76 -0
hf_sync.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+ import sys
3
+ import os
4
+ import tarfile
5
+ import tempfile
6
+
7
+ def manage_backups(api, repo_id, max_files=50):
8
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
9
+ backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
10
+ backup_files.sort()
11
+
12
+ if len(backup_files) >= max_files:
13
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
14
+ for file_to_delete in files_to_delete:
15
+ try:
16
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type="dataset")
17
+ print(f'Deleted old backup: {file_to_delete}')
18
+ except Exception as e:
19
+ print(f'Error deleting {file_to_delete}: {str(e)}')
20
+
21
+ def upload_backup(file_path, file_name, token, repo_id):
22
+ api = HfApi(token=token)
23
+ try:
24
+ api.upload_file(
25
+ path_or_fileobj=file_path,
26
+ path_in_repo=file_name,
27
+ repo_id=repo_id,
28
+ repo_type="dataset"
29
+ )
30
+ print(f"Successfully uploaded {file_name}")
31
+
32
+ manage_backups(api, repo_id)
33
+ except Exception as e:
34
+ print(f"Error uploading file: {str(e)}")
35
+
36
+ # 下载最新备份
37
+ def download_latest_backup(token, repo_id):
38
+ try:
39
+ api = HfApi(token=token)
40
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
41
+ backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
42
+
43
+ if not backup_files:
44
+ print("No backup files found")
45
+ return
46
+
47
+ latest_backup = sorted(backup_files)[-1]
48
+
49
+ with tempfile.TemporaryDirectory() as temp_dir:
50
+ filepath = api.hf_hub_download(
51
+ repo_id=repo_id,
52
+ filename=latest_backup,
53
+ repo_type="dataset",
54
+ local_dir=temp_dir
55
+ )
56
+
57
+ if filepath and os.path.exists(filepath):
58
+ with tarfile.open(filepath, 'r:gz') as tar:
59
+ tar.extractall('/home/app/storage/')
60
+ print(f"Successfully restored backup from {latest_backup}")
61
+
62
+ except Exception as e:
63
+ print(f"Error downloading backup: {str(e)}")
64
+
65
+ if __name__ == "__main__":
66
+ action = sys.argv[1]
67
+ token = sys.argv[2]
68
+ repo_id = sys.argv[3]
69
+
70
+ if action == "upload":
71
+ file_path = sys.argv[4]
72
+ file_name = sys.argv[5]
73
+ upload_backup(file_path, file_name, token, repo_id)
74
+ elif action == "download":
75
+ download_latest_backup(token, repo_id)
76
+ EOL