Heng2004 commited on
Commit
405e720
·
verified ·
1 Parent(s): 11d64bd

Update loader.py

Browse files
Files changed (1) hide show
  1. loader.py +45 -0
loader.py CHANGED
@@ -18,6 +18,51 @@ MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
18
 
19
  GLOSSARY_PATH = os.path.join(DATA_DIR, "glossary.jsonl")
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def sync_upload_manual_qa() -> str:
23
  """
 
18
 
19
  GLOSSARY_PATH = os.path.join(DATA_DIR, "glossary.jsonl")
20
 
21
+ # Add this new filename constant
22
+ CACHE_FILENAME = "cached_embeddings.pt"
23
+ CACHE_PATH = os.path.join(DATA_DIR, CACHE_FILENAME)
24
+
25
+ def sync_upload_cache() -> str:
26
+ """Upload the cached_embeddings.pt to Hugging Face Dataset."""
27
+ if not DATASET_REPO_ID or "YOUR_USERNAME" in DATASET_REPO_ID:
28
+ return "⚠️ Upload Skipped (Repo ID not set)"
29
+
30
+ print(f"[INFO] Uploading {CACHE_FILENAME}...")
31
+ try:
32
+ from huggingface_hub import HfApi
33
+ api = HfApi()
34
+ api.upload_file(
35
+ path_or_fileobj=CACHE_PATH,
36
+ path_in_repo=CACHE_FILENAME,
37
+ repo_id=DATASET_REPO_ID,
38
+ repo_type="dataset",
39
+ commit_message="System: Updated embedding cache"
40
+ )
41
+ return "☁️ Cache Upload Success"
42
+ except Exception as e:
43
+ print(f"[ERROR] Upload cache failed: {e}")
44
+ return f"⚠️ Cache Upload Failed: {e}"
45
+
46
+ def sync_download_cache() -> None:
47
+ """Download cached_embeddings.pt at startup."""
48
+ if not DATASET_REPO_ID:
49
+ return
50
+
51
+ print(f"[INFO] Downloading {CACHE_FILENAME}...")
52
+ try:
53
+ from huggingface_hub import hf_hub_download
54
+ import shutil
55
+
56
+ downloaded_path = hf_hub_download(
57
+ repo_id=DATASET_REPO_ID,
58
+ filename=CACHE_FILENAME,
59
+ repo_type="dataset",
60
+ token=os.environ.get("HF_TOKEN")
61
+ )
62
+ shutil.copy(downloaded_path, CACHE_PATH)
63
+ print("[INFO] Cache download success!")
64
+ except Exception as e:
65
+ print(f"[WARN] Could not download cache (First run?): {e}")
66
 
67
  def sync_upload_manual_qa() -> str:
68
  """