autoface commited on
Commit
4dad9c6
Β·
1 Parent(s): 2375990

Enhanced dataset upload and rebuild logic to ensure datasets exist and are private before uploading. Updated relevant API calls to create private datasets.

Browse files
Files changed (1) hide show
  1. scripts/utils/hf_persistence.py +12 -2
scripts/utils/hf_persistence.py CHANGED
@@ -68,6 +68,16 @@ class HFPersistenceManager:
68
  """
69
  try:
70
  api = self._get_api()
 
 
 
 
 
 
 
 
 
 
71
  api.upload_file(
72
  path_or_fileobj=local_path,
73
  path_in_repo=remote_path,
@@ -156,7 +166,7 @@ class HFPersistenceManager:
156
 
157
  # Recreate dataset
158
  print('πŸ”¨ Recreating dataset...')
159
- api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True)
160
  print('βœ“ Dataset recreated successfully')
161
 
162
  # Restore backed up files
@@ -202,7 +212,7 @@ class HFPersistenceManager:
202
 
203
  # Recreate dataset
204
  print('πŸ”¨ Recreating dataset...')
205
- api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True)
206
  print('βœ“ Dataset recreated successfully')
207
 
208
  print('πŸŽ‰ Dataset recreation and LFS cleanup completed!')
 
68
  """
69
  try:
70
  api = self._get_api()
71
+
72
+ # Ensure dataset exists and is private before uploading
73
+ try:
74
+ api.repo_info(repo_id=self.dataset_id, repo_type='dataset')
75
+ print(f'βœ“ Dataset exists: {self.dataset_id}')
76
+ except Exception:
77
+ print(f'πŸ“ Dataset does not exist, creating private dataset: {self.dataset_id}')
78
+ api.create_repo(repo_id=self.dataset_id, repo_type='dataset', private=True)
79
+ print(f'βœ“ Private dataset created: {self.dataset_id}')
80
+
81
  api.upload_file(
82
  path_or_fileobj=local_path,
83
  path_in_repo=remote_path,
 
166
 
167
  # Recreate dataset
168
  print('πŸ”¨ Recreating dataset...')
169
+ api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True, private=True)
170
  print('βœ“ Dataset recreated successfully')
171
 
172
  # Restore backed up files
 
212
 
213
  # Recreate dataset
214
  print('πŸ”¨ Recreating dataset...')
215
+ api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True, private=True)
216
  print('βœ“ Dataset recreated successfully')
217
 
218
  print('πŸŽ‰ Dataset recreation and LFS cleanup completed!')