# app.py
import os
import streamlit as st
import pandas as pd
import gdown

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# -------------------------
# 1) Config + Secrets
# -------------------------
st.set_page_config(page_title="Loan Processing RAG App", layout="centered")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # set in HF Space Secrets
if not OPENAI_API_KEY:
    st.error("Missing OPENAI_API_KEY. Please set it in Hugging Face Space → Settings → Secrets.")
    st.stop()

DATA_DIR = "datafile"
DB_DIR = "meddoc_db"  # persisted in the Space runtime filesystem

FILE_LIST = {
    "Bank Loan Interest Rate Policy.pdf": "1HBIz4oRl6JKkozjzH82ktmuo0k9XC6nI",
    "Bank Loan Overall Risk Policy.pdf": "1I2GleuGPrl14e9QFE6J7hLiWqf9uyrQo",
    "Customer Account Status.csv": "101J48wIK0LrmOCvY--Eoja5Z7wTRVavM",
    "Customer Credit Score.csv": "1-umr8CKLZHFjoqCY9rimhdBscg5NucLJ",
    "Government PR Status.csv": "1Em_Qg0v9moR108K1WgDMh-b00DebvRs7"
}

# -------------------------
# 2) Download data from GDrive
# -------------------------
def ensure_data_downloaded():
    os.makedirs(DATA_DIR, exist_ok=True)
    for name, fid in FILE_LIST.items():
        out_path = os.path.join(DATA_DIR, name)
        if not os.path.exists(out_path):
            url = f"https://drive.google.com/uc?id={fid}"
            gdown.download(url, out_path, quiet=True)

# -------------------------
# 3) Build RAG resources once
# -------------------------
@st.cache_resource
def build_rag():
    api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAIAPIKEY")
    if not api_key:
        raise RuntimeError("Missing OpenAI API key. Set it in HF Space Secrets as OPENAIAPIKEY.")

    ensure_data_downloaded()

    loader = DirectoryLoader(DATA_DIR, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    chunks = splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=api_key)

    vectordb = Chroma.from_texts(
        [c.page_content for c in chunks],
        embeddings,
        collection_name="meddoc",
        persist_directory=DB_DIR
    )
    retriever = vectordb.as_retriever(search_kwargs={"k": 2})

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=api_key)
    return retriever, llm

try:
    retriever, llm = build_rag()
except Exception as e:
    st.error(f"Failed to initialize RAG: {e}")
    st.stop()

# -------------------------
# 4) Load CSVs
# -------------------------
credit_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Credit Score.csv"))
status_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Account Status.csv"))
pr_df = pd.read_csv(os.path.join(DATA_DIR, "Government PR Status.csv"))

def get_customer_details(customer_id: str):
    customer_id = customer_id.strip()
    if not customer_id.isdigit():
        raise ValueError("Customer ID must be a number (e.g., 1001).")
    id_int = int(customer_id)
    credit_row = credit_df[credit_df["ID"] == id_int]
    status_row = status_df[status_df["ID"] == id_int]
    pr_row = pr_df[pr_df["ID"] == id_int]

    if credit_row.empty:
        raise ValueError(f"Customer ID {customer_id} not found in Credit Score table.")
    if status_row.empty:
        raise ValueError(f"Customer ID {customer_id} not found in Account Status table.")

    credit_score = int(credit_row["Credit Score"].values[0])
    account_status = status_row["Account Status"].values[0]
    pr_status = pr_row["PR Status"].values[0] if not pr_row.empty else "Singaporean"
    return credit_score, account_status, pr_status

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def build_question(customer_id):
    credit, status, pr = get_customer_details(customer_id)
    return (
        f"Customer ID: {customer_id}, "
        f"Please provide a recommendation for Credit Score: {credit}, "
        f"Account Status: {status}, PR Status: {pr}"
    )

# -------------------------
# 5) Prompt (paste your TEMPLATE exactly)
# -------------------------
TEMPLATE = """
### Role
You are an experienced, detailed, caring, friendly, and warm Senior Loan Officer. Your primary task is to use the provided customer data and internal guidelines to generate a precise loan recommendation.

### MANDATORY INSTRUCTIONS

STRICTLY FOLLOW the steps below. The output must adhere exactly to the required Output Summary Format.

1.  Analyze Data & Context: Utilize the customer's profile data from {question} and the financial criteria/loan guidelines from the {context} (retrieved documents).
2.  Immediate Eligibility Check:
    If the PR Status is identified as Not Applicable, Foreigner, or any value indicating non-Permanent Resident status, STOP the analysis. Set the Overall Risk to "High" and the Interest Rate to "N/A." Overwrite the Recommendation with a friendly summary stating the applicant is Not Eligible for Foreigner without PR Status.
3.  Determine Overall Risk: Based on the Credit Score, Account Status, and {context} guidelines, determine the applicant's Overall Risk level (e.g., Low, Medium, High).
4.  Determine Interest Rate: Use the determined Overall Risk level and the specific interest rate matrix found within the {context} to assign the appropriate Interest Rate.
5.  Final Recommendation Summary: Summarize the entire decision (including the basis for the risk and rate) in a single, professional, and warm paragraph less than 60 words.

### Unavailability Protocol

If the necessary guidelines (risk tiers or interest rates) are missing from the {context} to complete steps 3 and 4, you must ONLY respond with the exact sentence: "I do not have the complete information for now and will need further review to provide an accurate recommendation."

### Output Summary Format

Produce the output summary using the exact structure below, filling in the bracketed placeholders:

Output Summary:
Customer ID: [Customer ID]
Credit Score: [Credit Score]
Account Status: [Account Status]
PR Status: [PR Status]
Overall Risk: [Low/Medium/High/Not Applicable]
Interest Rate: [e.g., 4.5% or N/A]
Recommendation: [Final summary less than 30 words]

Query:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(TEMPLATE)

chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

# -------------------------
# 6) Streamlit UI
# -------------------------
st.title("Loan Processing RAG App")
st.write("Enter a Customer ID to retrieve credit details and generate recommendations.")

customer_id = st.text_input("Enter Customer ID")

if st.button("Run Analysis"):
    if not customer_id.strip():
        st.error("Please enter a valid Customer ID.")
    else:
        try:
            question = build_question(customer_id)
            response = chain.invoke(question)
            st.success("Analysis completed.")
            st.subheader("Output Summary")
            st.text(response)
        except Exception as e:
            st.error(str(e))