import os from dotenv import load_dotenv from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_groq import ChatGroq from deep_translator import GoogleTranslator load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY is missing in your .env file") # ============================================================================= # Embeddings # ============================================================================= model_name = "BAAI/bge-base-en" encode_kwargs = {"normalize_embeddings": True} bge_embeddings = HuggingFaceEmbeddings( model_name=model_name, model_kwargs={"device": "cpu"}, encode_kwargs=encode_kwargs, ) # ============================================================================= # Load FAISS vector store # ============================================================================= persist_directory = "faiss_index" # renamed from chroma_vector_db vector_store = FAISS.load_local( persist_directory, bge_embeddings, allow_dangerous_deserialization=True, ) retriever = vector_store.as_retriever(search_kwargs={"k": 5}) # ============================================================================= # LLM # ============================================================================= llm = ChatGroq( model="openai/gpt-oss-20b", temperature=0, ) # ============================================================================= # LAYER 1 — Context Extraction Prompt # ============================================================================= # This is the NEW intermediate step. # Its only job is to read the raw retrieved chunks and produce a clean, # structured breakdown — it does NOT write the final answer yet. # ============================================================================= layer1_extraction_prompt = PromptTemplate( template=""" You are a structured information extractor for Canadian immigration documents. Your ONLY job is to read the retrieved text chunks below and extract the relevant facts into a structured format. Do NOT write a conversational answer. If the user is asking a general question about how the Express Entry system works, summarize all relevant information found across all categories below. User question: {question} Retrieved document chunks: {context} Extract and organise the relevant information under these categories. If a category has no relevant information, write "Not found in context." ELIGIBILITY REQUIREMENTS: PROCEDURAL STEPS: REQUIRED DOCUMENTS: KEY FACTS & DEFINITIONS: IMPORTANT WARNINGS OR EXCEPTIONS: """, input_variables=["question", "context"], ) # ============================================================================= # LAYER 2 — Response Generation Prompt # ============================================================================= # This takes the STRUCTURED OUTPUT from Layer 1 (not raw chunks) and # writes the final, readable answer for the user. # ============================================================================= layer2_generation_prompt = PromptTemplate( template=""" You are Setu, a bilingual immigration guidance assistant for Sri Lankan applicants applying to Canadian Express Entry. {language_instruction} Using ONLY the structured context provided below, write a clear, helpful, and accurate answer to the user's question. Do not invent any information. If the structured context does not contain enough information to answer, say: "I could not find specific information about this in my knowledge base." User question: {question} Structured context (extracted from official documents): {structured_context} Rules: - Answer in clear, plain language suitable for a non-legal reader. - Use a step-by-step format if the question is procedural. - Use short paragraphs if the question is informational. - Do not add information that is not in the structured context above. """, input_variables=["question", "structured_context", "language_instruction"], ) # Chain each prompt with the LLM layer1_chain = layer1_extraction_prompt | llm | StrOutputParser() layer2_chain = layer2_generation_prompt | llm | StrOutputParser() # ============================================================================= # Helper — format retrieved docs into plain text # ============================================================================= def _format_docs(docs) -> str: if not docs: return "No retrieved context." chunks = [] for i, doc in enumerate(docs, start=1): source = doc.metadata.get("source", "Unknown source") content = doc.page_content.strip() chunks.append(f"[Document {i}] Source: {source}\n{content}") return "\n\n".join(chunks) # ============================================================================= # agent_inference — the public function called by server.py # ============================================================================= def agent_inference(question: str, language: str = "en") -> str: """ Dual-layer RAG pipeline: Step 1 — Retrieve Fetch the top-k relevant chunks from the FAISS vector store. Step 2 — Layer 1: Context Extraction Send the raw chunks to the LLM with a strict extraction prompt. Output: structured categories (eligibility, procedures, documents…) Step 3 — Layer 2: Response Generation Send the STRUCTURED output (not the raw chunks) to the LLM with a generation prompt. Output: the final user-facing answer. """ if not question or not question.strip(): return "Please provide a valid question." # ── Step 1: Retrieve ───────────────────────────────────────────────────── docs = retriever.invoke(question) raw_context = _format_docs(docs) # ── Step 2: Layer 1 — Extract & Structure ──────────────────────────────── structured_context = layer1_chain.invoke( { "question": question, "context": raw_context, } ) # ── Step 3: Layer 2 — Generate Final Answer ────────────────────────────── final_answer = layer2_chain.invoke( { "question": question, "structured_context": structured_context, "language_instruction": "You MUST respond entirely in Sinhala language." if language == "si" else "Respond in English.", } ) return final_answer __all__ = ["agent_inference", "translate_to_english", "translate_to_sinhala"] def translate_to_english(text: str) -> str: try: return GoogleTranslator(source="si", target="en").translate(text) except Exception as e: print(f"Translation error (si→en): {e}") return text def translate_to_sinhala(text: str) -> str: try: return GoogleTranslator(source="en", target="si").translate(text) except Exception as e: print(f"Translation error (en→si): {e}") return text