NimrodDev commited on
Commit
3ab9e0a
Β·
1 Parent(s): 13a37a9
Files changed (2) hide show
  1. install_cache.sh +15 -0
  2. rag.py +7 -15
install_cache.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # install_cache.sh –– runs ONCE during HF build (online) β†’ caches into ./.cache
3
+ set -e
4
+ python - <<'PY'
5
+ from datasets import load_dataset
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ # 1. download plain text dataset (online, build-time only)
9
+ ds = load_dataset("NimrodDev/LD_Events_TEXT", split="train")
10
+ print("βœ“ Dataset cached at build time")
11
+
12
+ # 2. download embedding model (online, build-time only)
13
+ SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
14
+ print("βœ“ Embedding model cached at build time")
15
+ PY
rag.py CHANGED
@@ -15,7 +15,7 @@ from supabase import create_client
15
  # ------------------------------------------------------------------
16
  # CONFIG
17
  # ------------------------------------------------------------------
18
- HF_DS = "NimrodDev/LD_Events2" # parquet branch auto-converted
19
  EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
20
  LLM_MODEL = "microsoft/DialoGPT-medium"
21
  SUPABASE_URL = os.getenv("SUPABASE_URL")
@@ -97,30 +97,22 @@ def _fallback_answer(company: str, intent: str) -> str:
97
  # ------------------------------------------------------------------
98
  # RAM-ONLY DOCUMENT LOADER – OFF-LINE / PRE-CACHED
99
  # ------------------------------------------------------------------
 
100
  def load_texts() -> List[str]:
101
- # offline + trust_remote_code=False -> no write, no download
102
- ds = load_dataset(
103
- HF_DS,
104
- revision="refs/convert/parquet",
105
- split="train",
106
- trust_remote_code=False,
107
- keep_in_memory=True # force RAM, no disk touch
108
- )
109
  return [row["text"] for row in ds if row.get("text")]
110
 
111
- # ------------------------------------------------------------------
112
- # SINGLE-BUILD VECTOR STORE (cached for life of worker)
113
- # ------------------------------------------------------------------
114
  @lru_cache(maxsize=1)
115
  def get_vectorstore() -> FAISS:
116
  texts = load_texts()
117
  splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
118
  docs = splitter.create_documents(texts, metadatas=[{"source": HF_DS}] * len(texts))
119
 
120
- # tell sentence-transformers to use the pre-cached model inside the image
121
- os.environ["HF_HOME"] = "/code/.cache" # <-- NEW
122
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
123
- return FAISS.from_documents(docs, embeddings) # built ONCE per worker
124
 
125
  # ------------------------------------------------------------------
126
  # LLM
 
15
  # ------------------------------------------------------------------
16
  # CONFIG
17
  # ------------------------------------------------------------------
18
+ HF_DS = "NimrodDev/LD_Events_TEXT" # parquet branch auto-converted
19
  EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
20
  LLM_MODEL = "microsoft/DialoGPT-medium"
21
  SUPABASE_URL = os.getenv("SUPABASE_URL")
 
97
  # ------------------------------------------------------------------
98
  # RAM-ONLY DOCUMENT LOADER – OFF-LINE / PRE-CACHED
99
  # ------------------------------------------------------------------
100
+
101
  def load_texts() -> List[str]:
102
+ # offline + in-memory β†’ no write, no download at run-time
103
+ ds = load_dataset(HF_DS, split="train", keep_in_memory=True, trust_remote_code=False)
 
 
 
 
 
 
104
  return [row["text"] for row in ds if row.get("text")]
105
 
 
 
 
106
  @lru_cache(maxsize=1)
107
  def get_vectorstore() -> FAISS:
108
  texts = load_texts()
109
  splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
110
  docs = splitter.create_documents(texts, metadatas=[{"source": HF_DS}] * len(texts))
111
 
112
+ # force embeddings to use the pre-cached model dir (read-only)
113
+ os.environ["HF_HOME"] = "/code/.cache"
114
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
115
+ return FAISS.from_documents(docs, embeddings) # # built ONCE per worker
116
 
117
  # ------------------------------------------------------------------
118
  # LLM