switch to text-only dataset for offline build
Browse files- cache_ds.py +10 -3
cache_ds.py
CHANGED
|
@@ -1,5 +1,12 @@
|
|
| 1 |
# cache_ds.py
|
|
|
|
| 2 |
from datasets import load_dataset
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# cache_ds.py
|
| 2 |
+
# cache_ds.py –- build-time pre-cache (runs ONCE on HF builder)
|
| 3 |
from datasets import load_dataset
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
|
| 6 |
+
# 1. plain JSON dataset – zero custom features
|
| 7 |
+
ds = load_dataset("NimrodDev/LD_Events_TEXT", split="train", keep_in_memory=True)
|
| 8 |
+
print("✓ Text dataset cached at build time")
|
| 9 |
+
|
| 10 |
+
# 2. embedding model
|
| 11 |
+
SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 12 |
+
print("✓ Embedding model cached at build time")
|