Dinesh310 commited on
Commit
f4f294e
·
verified ·
1 Parent(s): 1925b26

Delete rag_engine.py

Browse files
Files changed (1) hide show
  1. rag_engine.py +0 -65
rag_engine.py DELETED
@@ -1,65 +0,0 @@
1
- import os
2
- from langchain_community.document_loaders import PyPDFLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain_openai import OpenAIEmbeddings, ChatOpenAI
5
- from langchain_community.vectorstores import FAISS
6
- from langchain_core.prompts import ChatPromptTemplate
7
- from langchain_core.output_parsers import StrOutputParser
8
- from langchain_core.runnables import RunnablePassthrough, RunnableParallel
9
-
10
- class ProjectRAGEngine:
11
- def __init__(self, api_key):
12
- self.embeddings = OpenAIEmbeddings(openai_api_key=api_key)
13
- self.llm = ChatOpenAI(model="gpt-4o", openai_api_key=api_key, temperature=0)
14
- self.vector_store = None
15
-
16
- def process_documents(self, pdf_paths):
17
- all_docs = []
18
- for path in pdf_paths:
19
- try:
20
- loader = PyPDFLoader(path)
21
- docs = loader.load()
22
- all_docs.extend(docs)
23
- except Exception as e:
24
- print(f"Error loading {path}: {e}")
25
-
26
- # Splitting logic to handle large reports [cite: 10]
27
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
28
- splits = text_splitter.split_documents(all_docs)
29
- self.vector_store = FAISS.from_documents(splits, self.embeddings)
30
-
31
- def _format_docs(self, docs):
32
- return "\n\n".join(doc.page_content for doc in docs)
33
-
34
- def get_answer(self, query):
35
- if not self.vector_store:
36
- return "Please upload documents first.", []
37
-
38
- # System prompt ensuring grounded responses [cite: 18, 25]
39
- template = """
40
- You are a professional Project Analyst. Answer strictly based on the provided context.
41
- If the answer is not in the context, say you don't know.
42
- Cite document names and page numbers for every answer. Include direct quotes.
43
-
44
- Context: {context}
45
- Question: {question}
46
- """
47
- prompt = ChatPromptTemplate.from_template(template)
48
- retriever = self.vector_store.as_retriever(search_kwargs={"k": 5})
49
-
50
- # Pure LCEL Chain composition
51
- rag_chain_from_docs = (
52
- RunnablePassthrough.assign(context=(lambda x: self._format_docs(x["context"])))
53
- | prompt
54
- | self.llm
55
- | StrOutputParser()
56
- )
57
-
58
- rag_chain_with_source = RunnableParallel(
59
- {"context": retriever, "question": RunnablePassthrough()}
60
- ).assign(answer=rag_chain_from_docs)
61
-
62
- result = rag_chain_with_source.invoke(query)
63
-
64
- sources = [{"content": doc.page_content, "metadata": doc.metadata} for doc in result["context"]]
65
- return result["answer"], sources