Heng2004 commited on
Commit
8798e5f
·
verified ·
1 Parent(s): 237554a

Update loader.py

Browse files
Files changed (1) hide show
  1. loader.py +34 -0
loader.py CHANGED
@@ -4,6 +4,8 @@ import json
4
  from typing import List, Dict, Any
5
 
6
  from huggingface_hub import hf_hub_download, HfApi
 
 
7
 
8
  import qa_store
9
 
@@ -16,6 +18,38 @@ MANUAL_QA_PATH = os.path.join(DATA_DIR, "manual_qa.jsonl")
16
 
17
  GLOSSARY_PATH = os.path.join(DATA_DIR, "glossary.jsonl")
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def load_curriculum() -> None:
20
  """
21
  Load official textbook JSONL into qa_store.ENTRIES and AUTO_QA_KNOWLEDGE.
 
4
  from typing import List, Dict, Any
5
 
6
  from huggingface_hub import hf_hub_download, HfApi
7
+ DATASET_REPO_ID = "Heng2004/lao-science-qa-store"
8
+ DATASET_FILENAME = "manual_qa.jsonl"
9
 
10
  import qa_store
11
 
 
18
 
19
  GLOSSARY_PATH = os.path.join(DATA_DIR, "glossary.jsonl")
20
 
21
+
22
+ def sync_download_manual_qa() -> None:
23
+ """
24
+ Download the latest manual_qa.jsonl from the Hugging Face Dataset repo
25
+ at startup so we don't lose previous teacher edits.
26
+ """
27
+ if not DATASET_REPO_ID or "YOUR_USERNAME" in DATASET_REPO_ID:
28
+ print("[WARN] DATASET_REPO_ID is not set. Skipping download.")
29
+ return
30
+
31
+ print(f"[INFO] Downloading {DATASET_FILENAME} from {DATASET_REPO_ID}...")
32
+ try:
33
+ from huggingface_hub import hf_hub_download
34
+
35
+ # Download file to a temporary path first
36
+ downloaded_path = hf_hub_download(
37
+ repo_id=DATASET_REPO_ID,
38
+ filename=DATASET_FILENAME,
39
+ repo_type="dataset",
40
+ token=os.environ.get("HF_TOKEN") # Uses the Space's system token
41
+ )
42
+
43
+ # Copy it to our local data folder
44
+ import shutil
45
+ target_path = MANUAL_QA_PATH
46
+ shutil.copy(downloaded_path, target_path)
47
+ print("[INFO] Download success!")
48
+
49
+ except Exception as e:
50
+ print(f"[WARN] Could not download manual_qa.jsonl: {e}")
51
+ print("[INFO] Starting with empty or local manual_qa.jsonl instead.")
52
+
53
  def load_curriculum() -> None:
54
  """
55
  Load official textbook JSONL into qa_store.ENTRIES and AUTO_QA_KNOWLEDGE.