wwforonce commited on
Commit
2a65be6
·
1 Parent(s): c3f29db

fix dataset upload and download

Browse files
Files changed (1) hide show
  1. sync_storage.py +17 -1
sync_storage.py CHANGED
@@ -223,10 +223,25 @@ This repository is automatically managed by the Open WebUI sync system.
223
  return
224
  # Try to download the latest data archive
225
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  # First try the latest link
227
  file_path = self.api.hf_hub_download(
228
  repo_id=self.repo_id,
229
- filename=self.latest_link,
230
  repo_type="dataset",
231
  token=self.token
232
  )
@@ -379,6 +394,7 @@ This repository is automatically managed by the Open WebUI sync system.
379
 
380
  logger.info(f"Found {len(current_files)} files to preserve.")
381
  archive_filenames = [i["filename"] for i in metadata["archives"]]
 
382
  logger.info(f"Archive filenames: {archive_filenames}")
383
 
384
  # 2. Download all current files to a temporary directory
 
223
  return
224
  # Try to download the latest data archive
225
  try:
226
+ metadata = self._get_metadata()
227
+
228
+ logger.info(f"Found {len(current_files)} files to preserve.")
229
+ archive_filenames = [i["filename"] for i in metadata["archives"]]
230
+ current_files = list_repo_files(self.repo_id, repo_type="dataset", token=self.token)
231
+ logger.info(f"Archive filenames: {archive_filenames}")
232
+ logger.info(f"Current filenames: {current_files}")
233
+ latest_link = self.latest_link
234
+ if (latest_link not in current_files) and (len(archive_filenames) > 0):
235
+ latest_link = archive_filenames[0]
236
+ logger.info(f"Latest link not found, falling back to first archive: {latest_link}")
237
+ else:
238
+ logger.error("No archives found in repository")
239
+ return
240
+ logger.info(f"Downloading latest data archive: {latest_link}")
241
  # First try the latest link
242
  file_path = self.api.hf_hub_download(
243
  repo_id=self.repo_id,
244
+ filename=latest_link,
245
  repo_type="dataset",
246
  token=self.token
247
  )
 
394
 
395
  logger.info(f"Found {len(current_files)} files to preserve.")
396
  archive_filenames = [i["filename"] for i in metadata["archives"]]
397
+ archive_filenames.append(self.latest_link) # Also preserve latest link
398
  logger.info(f"Archive filenames: {archive_filenames}")
399
 
400
  # 2. Download all current files to a temporary directory