Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| import faiss, pickle | |
| PDF_DIR = Path("data/pdfs") | |
| INDEX_DIR = Path("index") | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| def ingest(): | |
| texts = [] | |
| for pdf in PDF_DIR.glob("*.pdf"): | |
| reader = PdfReader(pdf) | |
| for p in reader.pages: | |
| t = p.extract_text() | |
| if t: | |
| texts.append(t) | |
| if not texts: | |
| return | |
| emb = model.encode(texts, show_progress_bar=False) | |
| index = faiss.IndexFlatL2(emb.shape[1]) | |
| index.add(emb) | |
| INDEX_DIR.mkdir(exist_ok=True) | |
| faiss.write_index(index, str(INDEX_DIR / "esg.faiss")) | |
| pickle.dump(texts, open(INDEX_DIR / "docs.pkl", "wb")) | |
| if __name__ == "__main__": | |
| ingest() | |