EphAsad commited on
Commit
820c2fc
·
verified ·
1 Parent(s): a3b2ab5

Update embed_index.py

Browse files
Files changed (1) hide show
  1. embed_index.py +26 -9
embed_index.py CHANGED
@@ -2,13 +2,24 @@ import json
2
  import os
3
  import faiss
4
  import numpy as np
5
- from sentence_transformers import SentenceTransformer
6
 
7
- from config.settings import *
 
 
 
 
 
 
 
 
 
 
8
 
9
  CONDITIONS_DIR = "Conditions"
10
- INDEX_DIR = "index"
11
 
 
 
12
  def load_chunks():
13
  texts = []
14
  metadatas = []
@@ -28,23 +39,29 @@ def load_chunks():
28
  for chunk in chunks:
29
  texts.append(chunk["text"])
30
  metadatas.append({
31
- "condition": chunk["condition"],
32
- "section": chunk["section"],
33
- "source_id": chunk["source_id"]
34
  })
35
 
36
  return texts, metadatas
37
 
38
 
 
39
  def main():
40
- print("Loading embedding model...")
 
 
41
  model = SentenceTransformer(EMBEDDING_MODEL)
42
 
43
  texts, metadatas = load_chunks()
 
 
 
44
  print(f"Loaded {len(texts)} chunks")
45
 
46
  embeddings = model.encode(texts, show_progress_bar=True)
47
- embeddings = np.array(embeddings).astype("float32")
48
 
49
  index = faiss.IndexFlatL2(embeddings.shape[1])
50
  index.add(embeddings)
@@ -56,7 +73,7 @@ def main():
56
  with open(METADATA_PATH, "w", encoding="utf-8") as f:
57
  json.dump(metadatas, f, indent=2)
58
 
59
- print("FAISS index built successfully")
60
 
61
 
62
  if __name__ == "__main__":
 
2
  import os
3
  import faiss
4
  import numpy as np
 
5
 
6
+
7
+ # ---------- LOAD SETTINGS ----------
8
+ def load_settings():
9
+ with open("config/settings.json", "r", encoding="utf-8") as f:
10
+ return json.load(f)
11
+
12
+ SETTINGS = load_settings()
13
+
14
+ EMBEDDING_MODEL = SETTINGS["embedding_model"]
15
+ FAISS_INDEX_PATH = SETTINGS["faiss_index_path"]
16
+ METADATA_PATH = SETTINGS["metadata_path"]
17
 
18
  CONDITIONS_DIR = "Conditions"
19
+ INDEX_DIR = os.path.dirname(FAISS_INDEX_PATH)
20
 
21
+
22
+ # ---------- LOAD CHUNKS ----------
23
  def load_chunks():
24
  texts = []
25
  metadatas = []
 
39
  for chunk in chunks:
40
  texts.append(chunk["text"])
41
  metadatas.append({
42
+ "condition": chunk.get("condition"),
43
+ "section": chunk.get("section"),
44
+ "source_id": chunk.get("source_id")
45
  })
46
 
47
  return texts, metadatas
48
 
49
 
50
+ # ---------- BUILD INDEX ----------
51
  def main():
52
+ print("🔨 Building FAISS index...")
53
+
54
+ from sentence_transformers import SentenceTransformer
55
  model = SentenceTransformer(EMBEDDING_MODEL)
56
 
57
  texts, metadatas = load_chunks()
58
+ if not texts:
59
+ raise RuntimeError("No chunks found. Ensure Conditions/*/chunks.json exists.")
60
+
61
  print(f"Loaded {len(texts)} chunks")
62
 
63
  embeddings = model.encode(texts, show_progress_bar=True)
64
+ embeddings = np.asarray(embeddings, dtype="float32")
65
 
66
  index = faiss.IndexFlatL2(embeddings.shape[1])
67
  index.add(embeddings)
 
73
  with open(METADATA_PATH, "w", encoding="utf-8") as f:
74
  json.dump(metadatas, f, indent=2)
75
 
76
+ print("FAISS index built successfully")
77
 
78
 
79
  if __name__ == "__main__":