Spaces:
Sleeping
Sleeping
File size: 1,607 Bytes
8b4f950 d58d3d6 dba8fe6 6158a38 dba8fe6 d58d3d6 d5b1e9d d58d3d6 dba8fe6 d58d3d6 dba8fe6 d58d3d6 dba8fe6 d58d3d6 dba8fe6 d58d3d6 dba8fe6 d58d3d6 6158a38 d5b1e9d d749425 6158a38 d5b1e9d 6158a38 d749425 6158a38 d749425 d5b1e9d d749425 6158a38 d5b1e9d 6158a38 d5b1e9d 6158a38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pypdf import PdfReader
from vectorstore import insert_document, search_query, create_collection
from llm_node import refine_answer
def load_pdf(pdf_path: str):
"""Load PDF, split into chunks, and store embeddings in Qdrant."""
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
extracted = page.extract_text()
if extracted:
text += extracted
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
chunks = splitter.split_text(text)
create_collection()
for i, chunk in enumerate(chunks):
insert_document(chunk, i)
def rag_answer(query: str) -> str:
"""
Universal RAG Answering:
- Handles factual questions
- Handles step-by-step SOP questions
- Handles summary / explanation questions
- Works for B2B and non-B2B documents
"""
docs = search_query(query)
if not docs:
return "No relevant information found in the document."
context = "\n".join(docs)
prompt = f"""
You are a professional AI assistant answering questions strictly using the given context.
Rules:
- Use ONLY the provided context.
- Do NOT repeat the full document.
- Be precise and useful.
- If the question asks for steps or a process, return a numbered list.
- If the question asks for a summary, provide a concise paragraph.
- If the question is factual, answer directly.
- Do NOT hallucinate.
Context:
{context}
Question:
{query}
Answer:
"""
return refine_answer(prompt)
|