Spaces:
Running
Running
fix: Use HfApi.upload_file for more reliable qlog uploads
Browse filesChanged from HfFileSystem (which had issues with append mode) to
HfApi.upload_file which is more reliable:
- Downloads existing content first
- Appends new entries
- Uploads combined content with commit message
Added more detailed logging to track upload progress.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
src/rag_chatbot/qlog/hf_writer.py
CHANGED
|
@@ -542,7 +542,7 @@ class HFDatasetWriter:
|
|
| 542 |
try:
|
| 543 |
# Lazy import huggingface_hub
|
| 544 |
from huggingface_hub import ( # type: ignore[attr-defined]
|
| 545 |
-
|
| 546 |
)
|
| 547 |
|
| 548 |
# Build JSONL content from entries
|
|
@@ -553,22 +553,52 @@ class HFDatasetWriter:
|
|
| 553 |
|
| 554 |
jsonl_content = "\n".join(jsonl_lines) + "\n"
|
| 555 |
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
|
| 561 |
-
#
|
| 562 |
-
|
| 563 |
try:
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
|
| 573 |
except ImportError:
|
| 574 |
logger.exception("huggingface_hub not installed, cannot upload logs")
|
|
|
|
| 542 |
try:
|
| 543 |
# Lazy import huggingface_hub
|
| 544 |
from huggingface_hub import ( # type: ignore[attr-defined]
|
| 545 |
+
HfApi as _HfApi,
|
| 546 |
)
|
| 547 |
|
| 548 |
# Build JSONL content from entries
|
|
|
|
| 553 |
|
| 554 |
jsonl_content = "\n".join(jsonl_lines) + "\n"
|
| 555 |
|
| 556 |
+
logger.info(
|
| 557 |
+
"Uploading %d log entries to repo '%s'...",
|
| 558 |
+
len(entries),
|
| 559 |
+
self._repo_id,
|
| 560 |
+
)
|
| 561 |
+
|
| 562 |
+
# Use HfApi.upload_file for more reliable uploads
|
| 563 |
+
# This creates the file if it doesn't exist and appends if it does
|
| 564 |
+
api = _HfApi(token=self._hf_token)
|
| 565 |
|
| 566 |
+
# First, try to download existing content to append
|
| 567 |
+
existing_content = ""
|
| 568 |
try:
|
| 569 |
+
from huggingface_hub import hf_hub_download
|
| 570 |
+
|
| 571 |
+
local_path = hf_hub_download(
|
| 572 |
+
repo_id=self._repo_id,
|
| 573 |
+
filename=QLOG_FILENAME,
|
| 574 |
+
repo_type="dataset",
|
| 575 |
+
token=self._hf_token,
|
| 576 |
+
)
|
| 577 |
+
with open(local_path, "r", encoding="utf-8") as f:
|
| 578 |
+
existing_content = f.read()
|
| 579 |
+
logger.debug("Loaded existing qlog with %d bytes", len(existing_content))
|
| 580 |
+
except Exception:
|
| 581 |
+
# File doesn't exist yet, that's OK
|
| 582 |
+
logger.debug("No existing qlog file, will create new one")
|
| 583 |
+
|
| 584 |
+
# Combine existing content with new entries
|
| 585 |
+
combined_content = existing_content + jsonl_content
|
| 586 |
+
|
| 587 |
+
# Upload the combined content
|
| 588 |
+
api.upload_file(
|
| 589 |
+
path_or_fileobj=combined_content.encode("utf-8"),
|
| 590 |
+
path_in_repo=QLOG_FILENAME,
|
| 591 |
+
repo_id=self._repo_id,
|
| 592 |
+
repo_type="dataset",
|
| 593 |
+
commit_message=f"Add {len(entries)} query log entries",
|
| 594 |
+
)
|
| 595 |
+
|
| 596 |
+
logger.info(
|
| 597 |
+
"Successfully uploaded %d entries to %s/%s",
|
| 598 |
+
len(entries),
|
| 599 |
+
self._repo_id,
|
| 600 |
+
QLOG_FILENAME,
|
| 601 |
+
)
|
| 602 |
|
| 603 |
except ImportError:
|
| 604 |
logger.exception("huggingface_hub not installed, cannot upload logs")
|