Spaces:

Dinesh310
/

Demo_1

Sleeping

App Files Files Community

Dinesh310 commited on Jan 17

Commit

1925b26

verified ·

1 Parent(s): 4928a8c

Create rag_engine.py

Browse files

Files changed (1) hide show

src/rag_engine.py +65 -0

src/rag_engine.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+from langchain_community.vectorstores import FAISS
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough, RunnableParallel
+class ProjectRAGEngine:
+    def __init__(self, api_key):
+        self.embeddings = OpenAIEmbeddings(openai_api_key=api_key)
+        self.llm = ChatOpenAI(model="gpt-4o", openai_api_key=api_key, temperature=0)
+        self.vector_store = None
+    def process_documents(self, pdf_paths):
+        all_docs = []
+        for path in pdf_paths:
+            try:
+                loader = PyPDFLoader(path)
+                docs = loader.load()
+                all_docs.extend(docs)
+            except Exception as e:
+                print(f"Error loading {path}: {e}")
+        # Splitting logic to handle large reports [cite: 10]
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        splits = text_splitter.split_documents(all_docs)
+        self.vector_store = FAISS.from_documents(splits, self.embeddings)
+    def _format_docs(self, docs):
+        return "\n\n".join(doc.page_content for doc in docs)
+    def get_answer(self, query):
+        if not self.vector_store:
+            return "Please upload documents first.", []
+        # System prompt ensuring grounded responses [cite: 18, 25]
+        template = """
+        You are a professional Project Analyst. Answer strictly based on the provided context.
+        If the answer is not in the context, say you don't know.
+        Cite document names and page numbers for every answer. Include direct quotes.
+        Context: {context}
+        Question: {question}
+        """
+        prompt = ChatPromptTemplate.from_template(template)
+        retriever = self.vector_store.as_retriever(search_kwargs={"k": 5})
+        # Pure LCEL Chain composition
+        rag_chain_from_docs = (
+            RunnablePassthrough.assign(context=(lambda x: self._format_docs(x["context"])))
+            | prompt
+            | self.llm
+            | StrOutputParser()
+        )
+        rag_chain_with_source = RunnableParallel(
+            {"context": retriever, "question": RunnablePassthrough()}
+        ).assign(answer=rag_chain_from_docs)
+        result = rag_chain_with_source.invoke(query)
+        sources = [{"content": doc.page_content, "metadata": doc.metadata} for doc in result["context"]]
+        return result["answer"], sources