Spaces:

Dinesh310
/

Demo_1

Sleeping

Dinesh310 commited on Jan 25

Commit

160beae

verified ·

1 Parent(s): f52da2a

Update src/ingestion/document_processor.py

Files changed (1) hide show

src/ingestion/document_processor.py CHANGED Viewed

@@ -8,19 +8,22 @@ class DocumentProcessor:
         self.embeddings = embeddings
     def process_pdfs(self, pdf_paths):
-        try:
-            documents = []
-            for path in pdf_paths:
-                loader = PyPDFLoader(path)
-                documents.extend(loader.load())
-            splitter = RecursiveCharacterTextSplitter(
-                chunk_size=Config.CHUNK_SIZE,
-                chunk_overlap=Config.CHUNK_OVERLAP
-            )
-            splits = splitter.split_documents(documents)
-            return FAISS.from_documents(splits, self.embeddings)
-        except Exception as e:
-            raise RuntimeError(f"Document processing failed: {e}")

         self.embeddings = embeddings
     def process_pdfs(self, pdf_paths):
+        documents = []
+        for path in pdf_paths:
+            loader = PyPDFLoader(path)
+            docs = loader.load()
+            # Ensure source filename exists
+            for d in docs:
+                d.metadata["source"] = path.split("/")[-1]
+            documents.extend(docs)
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=Config.CHUNK_SIZE,
+            chunk_overlap=Config.CHUNK_OVERLAP
+        )
+        splits = splitter.split_documents(documents)
+        return FAISS.from_documents(splits, self.embeddings)