sadickam Claude Opus 4.5 commited on
Commit
3b020bd
·
1 Parent(s): 605e9d9

fix: Use HfApi.upload_file for more reliable qlog uploads

Browse files

Changed from HfFileSystem (which had issues with append mode) to
HfApi.upload_file which is more reliable:
- Downloads existing content first
- Appends new entries
- Uploads combined content with commit message

Added more detailed logging to track upload progress.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/rag_chatbot/qlog/hf_writer.py +45 -15
src/rag_chatbot/qlog/hf_writer.py CHANGED
@@ -542,7 +542,7 @@ class HFDatasetWriter:
542
  try:
543
  # Lazy import huggingface_hub
544
  from huggingface_hub import ( # type: ignore[attr-defined]
545
- HfFileSystem as _HfFileSystem,
546
  )
547
 
548
  # Build JSONL content from entries
@@ -553,22 +553,52 @@ class HFDatasetWriter:
553
 
554
  jsonl_content = "\n".join(jsonl_lines) + "\n"
555
 
556
- # Use HfFileSystem for append operation
557
- # This is more efficient than downloading, appending, and re-uploading
558
- fs = _HfFileSystem(token=self._hf_token)
559
- file_path = f"datasets/{self._repo_id}/{QLOG_FILENAME}"
 
 
 
 
 
560
 
561
- # Append to the file (creates if doesn't exist)
562
- # Note: HfFileSystem.open() with mode 'a' appends to existing content
563
  try:
564
- with fs.open(file_path, mode="a") as f:
565
- f.write(jsonl_content)
566
- except FileNotFoundError:
567
- # File doesn't exist, create it with write mode
568
- with fs.open(file_path, mode="w") as f:
569
- f.write(jsonl_content)
570
-
571
- logger.debug("Uploaded %d entries to %s", len(entries), file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
 
573
  except ImportError:
574
  logger.exception("huggingface_hub not installed, cannot upload logs")
 
542
  try:
543
  # Lazy import huggingface_hub
544
  from huggingface_hub import ( # type: ignore[attr-defined]
545
+ HfApi as _HfApi,
546
  )
547
 
548
  # Build JSONL content from entries
 
553
 
554
  jsonl_content = "\n".join(jsonl_lines) + "\n"
555
 
556
+ logger.info(
557
+ "Uploading %d log entries to repo '%s'...",
558
+ len(entries),
559
+ self._repo_id,
560
+ )
561
+
562
+ # Use HfApi.upload_file for more reliable uploads
563
+ # This creates the file if it doesn't exist and appends if it does
564
+ api = _HfApi(token=self._hf_token)
565
 
566
+ # First, try to download existing content to append
567
+ existing_content = ""
568
  try:
569
+ from huggingface_hub import hf_hub_download
570
+
571
+ local_path = hf_hub_download(
572
+ repo_id=self._repo_id,
573
+ filename=QLOG_FILENAME,
574
+ repo_type="dataset",
575
+ token=self._hf_token,
576
+ )
577
+ with open(local_path, "r", encoding="utf-8") as f:
578
+ existing_content = f.read()
579
+ logger.debug("Loaded existing qlog with %d bytes", len(existing_content))
580
+ except Exception:
581
+ # File doesn't exist yet, that's OK
582
+ logger.debug("No existing qlog file, will create new one")
583
+
584
+ # Combine existing content with new entries
585
+ combined_content = existing_content + jsonl_content
586
+
587
+ # Upload the combined content
588
+ api.upload_file(
589
+ path_or_fileobj=combined_content.encode("utf-8"),
590
+ path_in_repo=QLOG_FILENAME,
591
+ repo_id=self._repo_id,
592
+ repo_type="dataset",
593
+ commit_message=f"Add {len(entries)} query log entries",
594
+ )
595
+
596
+ logger.info(
597
+ "Successfully uploaded %d entries to %s/%s",
598
+ len(entries),
599
+ self._repo_id,
600
+ QLOG_FILENAME,
601
+ )
602
 
603
  except ImportError:
604
  logger.exception("huggingface_hub not installed, cannot upload logs")