Spaces:
Paused
Paused
fix dataset upload and download
Browse files- sync_storage.py +17 -1
sync_storage.py
CHANGED
|
@@ -223,10 +223,25 @@ This repository is automatically managed by the Open WebUI sync system.
|
|
| 223 |
return
|
| 224 |
# Try to download the latest data archive
|
| 225 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
# First try the latest link
|
| 227 |
file_path = self.api.hf_hub_download(
|
| 228 |
repo_id=self.repo_id,
|
| 229 |
-
filename=
|
| 230 |
repo_type="dataset",
|
| 231 |
token=self.token
|
| 232 |
)
|
|
@@ -379,6 +394,7 @@ This repository is automatically managed by the Open WebUI sync system.
|
|
| 379 |
|
| 380 |
logger.info(f"Found {len(current_files)} files to preserve.")
|
| 381 |
archive_filenames = [i["filename"] for i in metadata["archives"]]
|
|
|
|
| 382 |
logger.info(f"Archive filenames: {archive_filenames}")
|
| 383 |
|
| 384 |
# 2. Download all current files to a temporary directory
|
|
|
|
| 223 |
return
|
| 224 |
# Try to download the latest data archive
|
| 225 |
try:
|
| 226 |
+
metadata = self._get_metadata()
|
| 227 |
+
|
| 228 |
+
logger.info(f"Found {len(current_files)} files to preserve.")
|
| 229 |
+
archive_filenames = [i["filename"] for i in metadata["archives"]]
|
| 230 |
+
current_files = list_repo_files(self.repo_id, repo_type="dataset", token=self.token)
|
| 231 |
+
logger.info(f"Archive filenames: {archive_filenames}")
|
| 232 |
+
logger.info(f"Current filenames: {current_files}")
|
| 233 |
+
latest_link = self.latest_link
|
| 234 |
+
if (latest_link not in current_files) and (len(archive_filenames) > 0):
|
| 235 |
+
latest_link = archive_filenames[0]
|
| 236 |
+
logger.info(f"Latest link not found, falling back to first archive: {latest_link}")
|
| 237 |
+
else:
|
| 238 |
+
logger.error("No archives found in repository")
|
| 239 |
+
return
|
| 240 |
+
logger.info(f"Downloading latest data archive: {latest_link}")
|
| 241 |
# First try the latest link
|
| 242 |
file_path = self.api.hf_hub_download(
|
| 243 |
repo_id=self.repo_id,
|
| 244 |
+
filename=latest_link,
|
| 245 |
repo_type="dataset",
|
| 246 |
token=self.token
|
| 247 |
)
|
|
|
|
| 394 |
|
| 395 |
logger.info(f"Found {len(current_files)} files to preserve.")
|
| 396 |
archive_filenames = [i["filename"] for i in metadata["archives"]]
|
| 397 |
+
archive_filenames.append(self.latest_link) # Also preserve latest link
|
| 398 |
logger.info(f"Archive filenames: {archive_filenames}")
|
| 399 |
|
| 400 |
# 2. Download all current files to a temporary directory
|