from langchain_text_splitters import RecursiveCharacterTextSplitter from pypdf import PdfReader from vectorstore import insert_document, search_query, create_collection from llm_node import refine_answer def load_pdf(pdf_path: str): """Load PDF, split into chunks, and store embeddings in Qdrant.""" reader = PdfReader(pdf_path) text = "" for page in reader.pages: extracted = page.extract_text() if extracted: text += extracted splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) chunks = splitter.split_text(text) create_collection() for i, chunk in enumerate(chunks): insert_document(chunk, i) def rag_answer(query: str) -> str: """ Universal RAG Answering: - Handles factual questions - Handles step-by-step SOP questions - Handles summary / explanation questions - Works for B2B and non-B2B documents """ docs = search_query(query) if not docs: return "No relevant information found in the document." context = "\n".join(docs) prompt = f""" You are a professional AI assistant answering questions strictly using the given context. Rules: - Use ONLY the provided context. - Do NOT repeat the full document. - Be precise and useful. - If the question asks for steps or a process, return a numbered list. - If the question asks for a summary, provide a concise paragraph. - If the question is factual, answer directly. - Do NOT hallucinate. Context: {context} Question: {query} Answer: """ return refine_answer(prompt)