Spaces:

Satyam0077
/

ai-pipeline-assignment

Sleeping

File size: 1,607 Bytes

8b4f950
d58d3d6
dba8fe6
6158a38
dba8fe6
d58d3d6
 
d5b1e9d
d58d3d6
 
dba8fe6
d58d3d6
dba8fe6
 
 
d58d3d6
dba8fe6
 
 
 
d58d3d6
 
 
dba8fe6
d58d3d6
 
 
dba8fe6
d58d3d6
6158a38
d5b1e9d
d749425
 
 
 
6158a38
 
 
 
d5b1e9d
6158a38
 
 
 
d749425
6158a38
d749425
d5b1e9d
d749425
 
 
 
 
 
6158a38
d5b1e9d
 
6158a38
d5b1e9d
 
 
 
 
6158a38

from langchain_text_splitters import RecursiveCharacterTextSplitter
from pypdf import PdfReader
from vectorstore import insert_document, search_query, create_collection
from llm_node import refine_answer


def load_pdf(pdf_path: str):
    """Load PDF, split into chunks, and store embeddings in Qdrant."""
    reader = PdfReader(pdf_path)
    text = ""

    for page in reader.pages:
        extracted = page.extract_text()
        if extracted:
            text += extracted

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50
    )
    chunks = splitter.split_text(text)

    create_collection()

    for i, chunk in enumerate(chunks):
        insert_document(chunk, i)


def rag_answer(query: str) -> str:
    """
    Universal RAG Answering:
    - Handles factual questions
    - Handles step-by-step SOP questions
    - Handles summary / explanation questions
    - Works for B2B and non-B2B documents
    """
    docs = search_query(query)

    if not docs:
        return "No relevant information found in the document."

    context = "\n".join(docs)

    prompt = f"""
You are a professional AI assistant answering questions strictly using the given context.

Rules:
- Use ONLY the provided context.
- Do NOT repeat the full document.
- Be precise and useful.
- If the question asks for steps or a process, return a numbered list.
- If the question asks for a summary, provide a concise paragraph.
- If the question is factual, answer directly.
- Do NOT hallucinate.

Context:
{context}

Question:
{query}

Answer:
"""

    return refine_answer(prompt)