Spaces:

tech5
/

docu-backend

Running

File size: 2,740 Bytes

e27c97c

from langchain_groq import ChatGroq
from .smart_chunking import get_chunked_docs
from langchain_core.documents import Document
from typing import List
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

VECTOR_PATH = "vectorstore/faiss_index"

llm = ChatGroq(model="llama-3.3-70b-versatile")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# This funtion include page_content + metadata fot better retrieval
def format_docs_with_metadata(docs):
    formatted = []
    for d in docs:
        meta = d.metadata
        citation = f"(Page {meta.get('page')}"
        if meta.get("ref"):
            citation += f", {meta.get('ref')}"
        citation += ")"

        formatted.append(
            f"{citation}\n{d.page_content}"
        )
    return "\n\n".join(formatted)


# Funtion For Storing Documents into VectorDatabase
def store_documents(docs:List[Document],embedding_model:str):
    vectorstore = FAISS.from_documents(docs,embedding=embedding_model)
    vectorstore.save_local(VECTOR_PATH)
    return vectorstore

# Funtion to load VectorDatabase for Retrieval Process
def load_documents(embedding_model:str):
    if not os.path.exists(VECTOR_PATH):
        raise ValueError("Vectorstore not found,Upload Your Document First")
    return FAISS.load_local(VECTOR_PATH,embeddings=embedding_model,allow_dangerous_deserialization=True)
    

# Prompt for LLM to execute Your task more efficiently
prompt = ChatPromptTemplate.from_template(
    """You are a professional research analyst.

Answer the question strictly using the information contained in the document excerpts below.
Do not mention the phrases "provided context", "given context", or similar meta-references.
Do not include conversational language or assumptions.

Writing guidelines:
- Use a formal, neutral, and analytical tone.
- Present information directly and concisely.
- If information is missing, clearly state that it is not available in the document.
- Do not speculate or add external knowledge.

Citation rules:
- List citations in a separate section highlighted with blue.
- Each citation must include page number and table/figure/image reference if available.
- Use this format exactly:
  • Page X, Table/Figure/Image Y (if applicable)

<Document Excerpts>
{context}
</Document Excerpts>

Question:
{input}
"""
)

# Get Retrieval chain
def get_rag_chain(retriever):
    chain = (
    {
        "context": retriever | format_docs_with_metadata,
        "input": RunnablePassthrough()
    }
    |prompt
    |llm
    )
    return chain