amalsp commited on
Commit
d91bd96
·
verified ·
1 Parent(s): e360f46

Create store_index.py

Browse files
Files changed (1) hide show
  1. store_index.py +28 -0
store_index.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.helper import load_pdf, text_split, download_hugging_face_embeddings
2
+ from langchain.vectorstores import FAISS
3
+ from dotenv import load_dotenv
4
+ from langchain.schema import Document
5
+
6
+ load_dotenv()
7
+
8
+ # Load and process PDF data
9
+ extracted_data = load_pdf("data/")
10
+ text_chunks = text_split(extracted_data)
11
+
12
+ # Download embeddings model
13
+ embedding_model = download_hugging_face_embeddings()
14
+
15
+ # Extract the page contents from the text chunks
16
+ texts = [chunk.page_content for chunk in text_chunks]
17
+
18
+ # Generate embeddings for the text chunks
19
+ embeddings = embedding_model.embed_documents(texts)
20
+
21
+ # Create Document objects with page content and embeddings
22
+ documents = [Document(page_content=text, embedding=embedding) for text, embedding in zip(texts, embeddings)]
23
+
24
+ # Initialize FAISS vector store with documents
25
+ vector_store = FAISS.from_documents(documents, embedding_model)
26
+
27
+ # Save the vector store to disk for later use
28
+ vector_store.save_local("vector_store")