import os
from dotenv import load_dotenv

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq
from deep_translator import GoogleTranslator

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise ValueError("GROQ_API_KEY is missing in your .env file")

# =============================================================================
# Embeddings
# =============================================================================
model_name = "BAAI/bge-base-en"
encode_kwargs = {"normalize_embeddings": True}

bge_embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={"device": "cpu"},
    encode_kwargs=encode_kwargs,
)

# =============================================================================
# Load FAISS vector store
# =============================================================================
persist_directory = "faiss_index"   # renamed from chroma_vector_db

vector_store = FAISS.load_local(
    persist_directory,
    bge_embeddings,
    allow_dangerous_deserialization=True,
)

retriever = vector_store.as_retriever(search_kwargs={"k": 5})

# =============================================================================
# LLM
# =============================================================================
llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
)

# =============================================================================
# LAYER 1 — Context Extraction Prompt
# =============================================================================
# This is the NEW intermediate step.
# Its only job is to read the raw retrieved chunks and produce a clean,
# structured breakdown — it does NOT write the final answer yet.
# =============================================================================
layer1_extraction_prompt = PromptTemplate(
    template="""
You are a structured information extractor for Canadian immigration documents.
Your ONLY job is to read the retrieved text chunks below and extract the
relevant facts into a structured format. Do NOT write a conversational answer.

If the user is asking a general question about how the Express Entry system works, 
summarize all relevant information found across all categories below.

User question: {question}

Retrieved document chunks:
{context}

Extract and organise the relevant information under these categories.
If a category has no relevant information, write "Not found in context."

ELIGIBILITY REQUIREMENTS:
<list any eligibility rules, criteria, or conditions found>

PROCEDURAL STEPS:
<list any step-by-step process or procedure found>

REQUIRED DOCUMENTS:
<list any documents, certificates, or forms mentioned>

KEY FACTS & DEFINITIONS:
<list any important numbers, scores, timelines, or definitions>

IMPORTANT WARNINGS OR EXCEPTIONS:
<list any warnings, exceptions, or special conditions>
""",
    input_variables=["question", "context"],
)

# =============================================================================
# LAYER 2 — Response Generation Prompt
# =============================================================================
# This takes the STRUCTURED OUTPUT from Layer 1 (not raw chunks) and
# writes the final, readable answer for the user.
# =============================================================================
layer2_generation_prompt = PromptTemplate(
    template="""
You are Setu, a bilingual immigration guidance assistant for Sri Lankan
applicants applying to Canadian Express Entry.

{language_instruction}

Using ONLY the structured context provided below, write a clear, helpful,
and accurate answer to the user's question. Do not invent any information.
If the structured context does not contain enough information to answer,
say: "I could not find specific information about this in my knowledge base."

User question: {question}

Structured context (extracted from official documents):
{structured_context}

Rules:
- Answer in clear, plain language suitable for a non-legal reader.
- Use a step-by-step format if the question is procedural.
- Use short paragraphs if the question is informational.
- Do not add information that is not in the structured context above.
""",
    input_variables=["question", "structured_context", "language_instruction"],
)

# Chain each prompt with the LLM
layer1_chain = layer1_extraction_prompt | llm | StrOutputParser()
layer2_chain = layer2_generation_prompt | llm | StrOutputParser()


# =============================================================================
# Helper — format retrieved docs into plain text
# =============================================================================
def _format_docs(docs) -> str:
    if not docs:
        return "No retrieved context."
    chunks = []
    for i, doc in enumerate(docs, start=1):
        source = doc.metadata.get("source", "Unknown source")
        content = doc.page_content.strip()
        chunks.append(f"[Document {i}] Source: {source}\n{content}")
    return "\n\n".join(chunks)


# =============================================================================
# agent_inference — the public function called by server.py
# =============================================================================
def agent_inference(question: str, language: str = "en") -> str:
    """
    Dual-layer RAG pipeline:

    Step 1 — Retrieve
        Fetch the top-k relevant chunks from the FAISS vector store.

    Step 2 — Layer 1: Context Extraction
        Send the raw chunks to the LLM with a strict extraction prompt.
        Output: structured categories (eligibility, procedures, documents…)

    Step 3 — Layer 2: Response Generation
        Send the STRUCTURED output (not the raw chunks) to the LLM with a
        generation prompt. Output: the final user-facing answer.
    """
    if not question or not question.strip():
        return "Please provide a valid question."

    # ── Step 1: Retrieve ─────────────────────────────────────────────────────
    docs = retriever.invoke(question)
    raw_context = _format_docs(docs)

    # ── Step 2: Layer 1 — Extract & Structure ────────────────────────────────
    structured_context = layer1_chain.invoke(
        {
            "question": question,
            "context": raw_context,
        }
    )

    # ── Step 3: Layer 2 — Generate Final Answer ──────────────────────────────
    final_answer = layer2_chain.invoke(
        {
            "question": question,
            "structured_context": structured_context,
            "language_instruction": "You MUST respond entirely in Sinhala language." if language == "si" else "Respond in English.",
        }
    )

    return final_answer


__all__ = ["agent_inference", "translate_to_english", "translate_to_sinhala"]


def translate_to_english(text: str) -> str:
    try:
        return GoogleTranslator(source="si", target="en").translate(text)
    except Exception as e:
        print(f"Translation error (si→en): {e}")
        return text

def translate_to_sinhala(text: str) -> str:
    try:
        return GoogleTranslator(source="en", target="si").translate(text)
    except Exception as e:
        print(f"Translation error (en→si): {e}")
        return text