Enhanced dataset upload and rebuild logic to ensure datasets exist and are private before uploading. Updated relevant API calls to create private datasets.
Browse files
scripts/utils/hf_persistence.py
CHANGED
|
@@ -68,6 +68,16 @@ class HFPersistenceManager:
|
|
| 68 |
"""
|
| 69 |
try:
|
| 70 |
api = self._get_api()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
api.upload_file(
|
| 72 |
path_or_fileobj=local_path,
|
| 73 |
path_in_repo=remote_path,
|
|
@@ -156,7 +166,7 @@ class HFPersistenceManager:
|
|
| 156 |
|
| 157 |
# Recreate dataset
|
| 158 |
print('π¨ Recreating dataset...')
|
| 159 |
-
api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True)
|
| 160 |
print('β Dataset recreated successfully')
|
| 161 |
|
| 162 |
# Restore backed up files
|
|
@@ -202,7 +212,7 @@ class HFPersistenceManager:
|
|
| 202 |
|
| 203 |
# Recreate dataset
|
| 204 |
print('π¨ Recreating dataset...')
|
| 205 |
-
api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True)
|
| 206 |
print('β Dataset recreated successfully')
|
| 207 |
|
| 208 |
print('π Dataset recreation and LFS cleanup completed!')
|
|
|
|
| 68 |
"""
|
| 69 |
try:
|
| 70 |
api = self._get_api()
|
| 71 |
+
|
| 72 |
+
# Ensure dataset exists and is private before uploading
|
| 73 |
+
try:
|
| 74 |
+
api.repo_info(repo_id=self.dataset_id, repo_type='dataset')
|
| 75 |
+
print(f'β Dataset exists: {self.dataset_id}')
|
| 76 |
+
except Exception:
|
| 77 |
+
print(f'π Dataset does not exist, creating private dataset: {self.dataset_id}')
|
| 78 |
+
api.create_repo(repo_id=self.dataset_id, repo_type='dataset', private=True)
|
| 79 |
+
print(f'β Private dataset created: {self.dataset_id}')
|
| 80 |
+
|
| 81 |
api.upload_file(
|
| 82 |
path_or_fileobj=local_path,
|
| 83 |
path_in_repo=remote_path,
|
|
|
|
| 166 |
|
| 167 |
# Recreate dataset
|
| 168 |
print('π¨ Recreating dataset...')
|
| 169 |
+
api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True, private=True)
|
| 170 |
print('β Dataset recreated successfully')
|
| 171 |
|
| 172 |
# Restore backed up files
|
|
|
|
| 212 |
|
| 213 |
# Recreate dataset
|
| 214 |
print('π¨ Recreating dataset...')
|
| 215 |
+
api.create_repo(repo_id=self.dataset_id, repo_type='dataset', exist_ok=True, private=True)
|
| 216 |
print('β Dataset recreated successfully')
|
| 217 |
|
| 218 |
print('π Dataset recreation and LFS cleanup completed!')
|