Dinesh310 commited on
Commit
5c65806
·
verified ·
1 Parent(s): ae8a6c0

Create vector_store/vector_store.py

Browse files
Files changed (1) hide show
  1. src/vector_store/vector_store.py +31 -0
src/vector_store/vector_store.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/vector_store.py
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+ from src.config import CHUNK_SIZE, CHUNK_OVERLAP
6
+ from src.exceptions import DocumentProcessingError
7
+
8
+ def build_vector_store(pdf_paths, embeddings, original_names=None):
9
+ try:
10
+ all_docs = []
11
+
12
+ for i, path in enumerate(pdf_paths):
13
+ loader = PyPDFLoader(path)
14
+ docs = loader.load()
15
+
16
+ if original_names and i < len(original_names):
17
+ for doc in docs:
18
+ doc.metadata["source"] = original_names[i]
19
+
20
+ all_docs.extend(docs)
21
+
22
+ splitter = RecursiveCharacterTextSplitter(
23
+ chunk_size=CHUNK_SIZE,
24
+ chunk_overlap=CHUNK_OVERLAP
25
+ )
26
+
27
+ splits = splitter.split_documents(all_docs)
28
+ return FAISS.from_documents(splits, embeddings)
29
+
30
+ except Exception as e:
31
+ raise DocumentProcessingError(f"PDF processing failed: {e}")