Spaces:

Thisara13
/

setuproject

Sleeping

setuproject / sethu_agent.py

Thisaraa13

initial commit

1f14da1 about 2 months ago

7.67 kB

	import os
	from dotenv import load_dotenv

	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_core.prompts import PromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_groq import ChatGroq
	from deep_translator import GoogleTranslator

	load_dotenv()

	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	if not GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY is missing in your .env file")

	# =============================================================================
	# Embeddings
	# =============================================================================
	model_name = "BAAI/bge-base-en"
	encode_kwargs = {"normalize_embeddings": True}

	bge_embeddings = HuggingFaceEmbeddings(
	model_name=model_name,
	model_kwargs={"device": "cpu"},
	encode_kwargs=encode_kwargs,
	)

	# =============================================================================
	# Load FAISS vector store
	# =============================================================================
	persist_directory = "faiss_index" # renamed from chroma_vector_db

	vector_store = FAISS.load_local(
	persist_directory,
	bge_embeddings,
	allow_dangerous_deserialization=True,
	)

	retriever = vector_store.as_retriever(search_kwargs={"k": 5})

	# =============================================================================
	# LLM
	# =============================================================================
	llm = ChatGroq(
	model="openai/gpt-oss-20b",
	temperature=0,
	)

	# =============================================================================
	# LAYER 1 — Context Extraction Prompt
	# =============================================================================
	# This is the NEW intermediate step.
	# Its only job is to read the raw retrieved chunks and produce a clean,
	# structured breakdown — it does NOT write the final answer yet.
	# =============================================================================
	layer1_extraction_prompt = PromptTemplate(
	template="""
	You are a structured information extractor for Canadian immigration documents.
	Your ONLY job is to read the retrieved text chunks below and extract the
	relevant facts into a structured format. Do NOT write a conversational answer.

	If the user is asking a general question about how the Express Entry system works,
	summarize all relevant information found across all categories below.

	User question: {question}

	Retrieved document chunks:
	{context}

	Extract and organise the relevant information under these categories.
	If a category has no relevant information, write "Not found in context."

	ELIGIBILITY REQUIREMENTS:
	<list any eligibility rules, criteria, or conditions found>

	PROCEDURAL STEPS:
	<list any step-by-step process or procedure found>

	REQUIRED DOCUMENTS:
	<list any documents, certificates, or forms mentioned>

	KEY FACTS & DEFINITIONS:
	<list any important numbers, scores, timelines, or definitions>

	IMPORTANT WARNINGS OR EXCEPTIONS:
	<list any warnings, exceptions, or special conditions>
	""",
	input_variables=["question", "context"],
	)

	# =============================================================================
	# LAYER 2 — Response Generation Prompt
	# =============================================================================
	# This takes the STRUCTURED OUTPUT from Layer 1 (not raw chunks) and
	# writes the final, readable answer for the user.
	# =============================================================================
	layer2_generation_prompt = PromptTemplate(
	template="""
	You are Setu, a bilingual immigration guidance assistant for Sri Lankan
	applicants applying to Canadian Express Entry.

	{language_instruction}

	Using ONLY the structured context provided below, write a clear, helpful,
	and accurate answer to the user's question. Do not invent any information.
	If the structured context does not contain enough information to answer,
	say: "I could not find specific information about this in my knowledge base."

	User question: {question}

	Structured context (extracted from official documents):
	{structured_context}

	Rules:
	- Answer in clear, plain language suitable for a non-legal reader.
	- Use a step-by-step format if the question is procedural.
	- Use short paragraphs if the question is informational.
	- Do not add information that is not in the structured context above.
	""",
	input_variables=["question", "structured_context", "language_instruction"],
	)

	# Chain each prompt with the LLM
	layer1_chain = layer1_extraction_prompt \| llm \| StrOutputParser()
	layer2_chain = layer2_generation_prompt \| llm \| StrOutputParser()


	# =============================================================================
	# Helper — format retrieved docs into plain text
	# =============================================================================
	def _format_docs(docs) -> str:
	if not docs:
	return "No retrieved context."
	chunks = []
	for i, doc in enumerate(docs, start=1):
	source = doc.metadata.get("source", "Unknown source")
	content = doc.page_content.strip()
	chunks.append(f"[Document {i}] Source: {source}\n{content}")
	return "\n\n".join(chunks)


	# =============================================================================
	# agent_inference — the public function called by server.py
	# =============================================================================
	def agent_inference(question: str, language: str = "en") -> str:
	"""
	Dual-layer RAG pipeline:

	Step 1 — Retrieve
	Fetch the top-k relevant chunks from the FAISS vector store.

	Step 2 — Layer 1: Context Extraction
	Send the raw chunks to the LLM with a strict extraction prompt.
	Output: structured categories (eligibility, procedures, documents…)

	Step 3 — Layer 2: Response Generation
	Send the STRUCTURED output (not the raw chunks) to the LLM with a
	generation prompt. Output: the final user-facing answer.
	"""
	if not question or not question.strip():
	return "Please provide a valid question."

	# ── Step 1: Retrieve ─────────────────────────────────────────────────────
	docs = retriever.invoke(question)
	raw_context = _format_docs(docs)

	# ── Step 2: Layer 1 — Extract & Structure ────────────────────────────────
	structured_context = layer1_chain.invoke(
	{
	"question": question,
	"context": raw_context,
	}
	)

	# ── Step 3: Layer 2 — Generate Final Answer ──────────────────────────────
	final_answer = layer2_chain.invoke(
	{
	"question": question,
	"structured_context": structured_context,
	"language_instruction": "You MUST respond entirely in Sinhala language." if language == "si" else "Respond in English.",
	}
	)

	return final_answer


	__all__ = ["agent_inference", "translate_to_english", "translate_to_sinhala"]


	def translate_to_english(text: str) -> str:
	try:
	return GoogleTranslator(source="si", target="en").translate(text)
	except Exception as e:
	print(f"Translation error (si→en): {e}")
	return text

	def translate_to_sinhala(text: str) -> str:
	try:
	return GoogleTranslator(source="en", target="si").translate(text)
	except Exception as e:
	print(f"Translation error (en→si): {e}")
	return text