Spaces:
Sleeping
Sleeping
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from pypdf import PdfReader | |
| from vectorstore import insert_document, search_query, create_collection | |
| from llm_node import refine_answer | |
| def load_pdf(pdf_path: str): | |
| """Load PDF, split into chunks, and store embeddings in Qdrant.""" | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| extracted = page.extract_text() | |
| if extracted: | |
| text += extracted | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50 | |
| ) | |
| chunks = splitter.split_text(text) | |
| create_collection() | |
| for i, chunk in enumerate(chunks): | |
| insert_document(chunk, i) | |
| def rag_answer(query: str) -> str: | |
| """ | |
| Universal RAG Answering: | |
| - Handles factual questions | |
| - Handles step-by-step SOP questions | |
| - Handles summary / explanation questions | |
| - Works for B2B and non-B2B documents | |
| """ | |
| docs = search_query(query) | |
| if not docs: | |
| return "No relevant information found in the document." | |
| context = "\n".join(docs) | |
| prompt = f""" | |
| You are a professional AI assistant answering questions strictly using the given context. | |
| Rules: | |
| - Use ONLY the provided context. | |
| - Do NOT repeat the full document. | |
| - Be precise and useful. | |
| - If the question asks for steps or a process, return a numbered list. | |
| - If the question asks for a summary, provide a concise paragraph. | |
| - If the question is factual, answer directly. | |
| - Do NOT hallucinate. | |
| Context: | |
| {context} | |
| Question: | |
| {query} | |
| Answer: | |
| """ | |
| return refine_answer(prompt) | |