setuproject / sethu_agent.py
Thisaraa13
initial commit
1f14da1
Raw
History Blame Contribute Delete
7.67 kB
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq
from deep_translator import GoogleTranslator
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY is missing in your .env file")
# =============================================================================
# Embeddings
# =============================================================================
model_name = "BAAI/bge-base-en"
encode_kwargs = {"normalize_embeddings": True}
bge_embeddings = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs={"device": "cpu"},
encode_kwargs=encode_kwargs,
)
# =============================================================================
# Load FAISS vector store
# =============================================================================
persist_directory = "faiss_index" # renamed from chroma_vector_db
vector_store = FAISS.load_local(
persist_directory,
bge_embeddings,
allow_dangerous_deserialization=True,
)
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
# =============================================================================
# LLM
# =============================================================================
llm = ChatGroq(
model="openai/gpt-oss-20b",
temperature=0,
)
# =============================================================================
# LAYER 1 β€” Context Extraction Prompt
# =============================================================================
# This is the NEW intermediate step.
# Its only job is to read the raw retrieved chunks and produce a clean,
# structured breakdown β€” it does NOT write the final answer yet.
# =============================================================================
layer1_extraction_prompt = PromptTemplate(
template="""
You are a structured information extractor for Canadian immigration documents.
Your ONLY job is to read the retrieved text chunks below and extract the
relevant facts into a structured format. Do NOT write a conversational answer.
If the user is asking a general question about how the Express Entry system works,
summarize all relevant information found across all categories below.
User question: {question}
Retrieved document chunks:
{context}
Extract and organise the relevant information under these categories.
If a category has no relevant information, write "Not found in context."
ELIGIBILITY REQUIREMENTS:
<list any eligibility rules, criteria, or conditions found>
PROCEDURAL STEPS:
<list any step-by-step process or procedure found>
REQUIRED DOCUMENTS:
<list any documents, certificates, or forms mentioned>
KEY FACTS & DEFINITIONS:
<list any important numbers, scores, timelines, or definitions>
IMPORTANT WARNINGS OR EXCEPTIONS:
<list any warnings, exceptions, or special conditions>
""",
input_variables=["question", "context"],
)
# =============================================================================
# LAYER 2 β€” Response Generation Prompt
# =============================================================================
# This takes the STRUCTURED OUTPUT from Layer 1 (not raw chunks) and
# writes the final, readable answer for the user.
# =============================================================================
layer2_generation_prompt = PromptTemplate(
template="""
You are Setu, a bilingual immigration guidance assistant for Sri Lankan
applicants applying to Canadian Express Entry.
{language_instruction}
Using ONLY the structured context provided below, write a clear, helpful,
and accurate answer to the user's question. Do not invent any information.
If the structured context does not contain enough information to answer,
say: "I could not find specific information about this in my knowledge base."
User question: {question}
Structured context (extracted from official documents):
{structured_context}
Rules:
- Answer in clear, plain language suitable for a non-legal reader.
- Use a step-by-step format if the question is procedural.
- Use short paragraphs if the question is informational.
- Do not add information that is not in the structured context above.
""",
input_variables=["question", "structured_context", "language_instruction"],
)
# Chain each prompt with the LLM
layer1_chain = layer1_extraction_prompt | llm | StrOutputParser()
layer2_chain = layer2_generation_prompt | llm | StrOutputParser()
# =============================================================================
# Helper β€” format retrieved docs into plain text
# =============================================================================
def _format_docs(docs) -> str:
if not docs:
return "No retrieved context."
chunks = []
for i, doc in enumerate(docs, start=1):
source = doc.metadata.get("source", "Unknown source")
content = doc.page_content.strip()
chunks.append(f"[Document {i}] Source: {source}\n{content}")
return "\n\n".join(chunks)
# =============================================================================
# agent_inference β€” the public function called by server.py
# =============================================================================
def agent_inference(question: str, language: str = "en") -> str:
"""
Dual-layer RAG pipeline:
Step 1 β€” Retrieve
Fetch the top-k relevant chunks from the FAISS vector store.
Step 2 β€” Layer 1: Context Extraction
Send the raw chunks to the LLM with a strict extraction prompt.
Output: structured categories (eligibility, procedures, documents…)
Step 3 β€” Layer 2: Response Generation
Send the STRUCTURED output (not the raw chunks) to the LLM with a
generation prompt. Output: the final user-facing answer.
"""
if not question or not question.strip():
return "Please provide a valid question."
# ── Step 1: Retrieve ─────────────────────────────────────────────────────
docs = retriever.invoke(question)
raw_context = _format_docs(docs)
# ── Step 2: Layer 1 β€” Extract & Structure ────────────────────────────────
structured_context = layer1_chain.invoke(
{
"question": question,
"context": raw_context,
}
)
# ── Step 3: Layer 2 β€” Generate Final Answer ──────────────────────────────
final_answer = layer2_chain.invoke(
{
"question": question,
"structured_context": structured_context,
"language_instruction": "You MUST respond entirely in Sinhala language." if language == "si" else "Respond in English.",
}
)
return final_answer
__all__ = ["agent_inference", "translate_to_english", "translate_to_sinhala"]
def translate_to_english(text: str) -> str:
try:
return GoogleTranslator(source="si", target="en").translate(text)
except Exception as e:
print(f"Translation error (si→en): {e}")
return text
def translate_to_sinhala(text: str) -> str:
try:
return GoogleTranslator(source="en", target="si").translate(text)
except Exception as e:
print(f"Translation error (en→si): {e}")
return text