Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import faiss | |
| import numpy as np | |
| # ---------- LOAD SETTINGS ---------- | |
| def load_settings(): | |
| with open("config/settings.json", "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| SETTINGS = load_settings() | |
| EMBEDDING_MODEL = SETTINGS["embedding_model"] | |
| FAISS_INDEX_PATH = SETTINGS["faiss_index_path"] | |
| METADATA_PATH = SETTINGS["metadata_path"] | |
| CONDITIONS_DIR = "Conditions" | |
| INDEX_DIR = os.path.dirname(FAISS_INDEX_PATH) | |
| # ---------- LOAD CHUNKS ---------- | |
| def load_chunks(): | |
| texts = [] | |
| metadatas = [] | |
| for condition in os.listdir(CONDITIONS_DIR): | |
| cond_path = os.path.join(CONDITIONS_DIR, condition) | |
| if not os.path.isdir(cond_path): | |
| continue | |
| chunks_path = os.path.join(cond_path, "chunks.json") | |
| if not os.path.exists(chunks_path): | |
| continue | |
| with open(chunks_path, "r", encoding="utf-8") as f: | |
| chunks = json.load(f) | |
| for chunk in chunks: | |
| texts.append(chunk["text"]) | |
| metadatas.append({ | |
| "condition": chunk.get("condition"), | |
| "section": chunk.get("section"), | |
| "source_id": chunk.get("source_id") | |
| }) | |
| return texts, metadatas | |
| # ---------- BUILD INDEX ---------- | |
| def main(): | |
| print("🔨 Building FAISS index...") | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer(EMBEDDING_MODEL) | |
| texts, metadatas = load_chunks() | |
| if not texts: | |
| raise RuntimeError("No chunks found. Ensure Conditions/*/chunks.json exists.") | |
| print(f"Loaded {len(texts)} chunks") | |
| embeddings = model.encode(texts, show_progress_bar=True) | |
| embeddings = np.asarray(embeddings, dtype="float32") | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(embeddings) | |
| os.makedirs(INDEX_DIR, exist_ok=True) | |
| faiss.write_index(index, FAISS_INDEX_PATH) | |
| with open(METADATA_PATH, "w", encoding="utf-8") as f: | |
| json.dump(metadatas, f, indent=2) | |
| print("✅ FAISS index built successfully") | |
| if __name__ == "__main__": | |
| main() | |