NimrodDev commited on
Commit
fa3d68f
·
1 Parent(s): 3ab9e0a

switch to text-only dataset for offline build

Browse files
Files changed (1) hide show
  1. cache_ds.py +10 -3
cache_ds.py CHANGED
@@ -1,5 +1,12 @@
1
  # cache_ds.py
 
2
  from datasets import load_dataset
3
- # downloads & caches the parquet branch into /code/.cache
4
- load_dataset("NimrodDev/LD_Events2", revision="refs/convert/parquet", split="train")
5
- print("Dataset cached.")
 
 
 
 
 
 
 
1
  # cache_ds.py
2
+ # cache_ds.py –- build-time pre-cache (runs ONCE on HF builder)
3
  from datasets import load_dataset
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # 1. plain JSON dataset – zero custom features
7
+ ds = load_dataset("NimrodDev/LD_Events_TEXT", split="train", keep_in_memory=True)
8
+ print("✓ Text dataset cached at build time")
9
+
10
+ # 2. embedding model
11
+ SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
12
+ print("✓ Embedding model cached at build time")