ai-pipeline-assignment / rag_node.py
Satyam0077's picture
Update rag_node.py
d749425 verified
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pypdf import PdfReader
from vectorstore import insert_document, search_query, create_collection
from llm_node import refine_answer
def load_pdf(pdf_path: str):
"""Load PDF, split into chunks, and store embeddings in Qdrant."""
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
extracted = page.extract_text()
if extracted:
text += extracted
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
chunks = splitter.split_text(text)
create_collection()
for i, chunk in enumerate(chunks):
insert_document(chunk, i)
def rag_answer(query: str) -> str:
"""
Universal RAG Answering:
- Handles factual questions
- Handles step-by-step SOP questions
- Handles summary / explanation questions
- Works for B2B and non-B2B documents
"""
docs = search_query(query)
if not docs:
return "No relevant information found in the document."
context = "\n".join(docs)
prompt = f"""
You are a professional AI assistant answering questions strictly using the given context.
Rules:
- Use ONLY the provided context.
- Do NOT repeat the full document.
- Be precise and useful.
- If the question asks for steps or a process, return a numbered list.
- If the question asks for a summary, provide a concise paragraph.
- If the question is factual, answer directly.
- Do NOT hallucinate.
Context:
{context}
Question:
{query}
Answer:
"""
return refine_answer(prompt)