# app.py import os import streamlit as st import pandas as pd import gdown from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser # ------------------------- # 1) Config + Secrets # ------------------------- st.set_page_config(page_title="Loan Processing RAG App", layout="centered") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # set in HF Space Secrets if not OPENAI_API_KEY: st.error("Missing OPENAI_API_KEY. Please set it in Hugging Face Space → Settings → Secrets.") st.stop() DATA_DIR = "datafile" DB_DIR = "meddoc_db" # persisted in the Space runtime filesystem FILE_LIST = { "Bank Loan Interest Rate Policy.pdf": "1HBIz4oRl6JKkozjzH82ktmuo0k9XC6nI", "Bank Loan Overall Risk Policy.pdf": "1I2GleuGPrl14e9QFE6J7hLiWqf9uyrQo", "Customer Account Status.csv": "101J48wIK0LrmOCvY--Eoja5Z7wTRVavM", "Customer Credit Score.csv": "1-umr8CKLZHFjoqCY9rimhdBscg5NucLJ", "Government PR Status.csv": "1Em_Qg0v9moR108K1WgDMh-b00DebvRs7" } # ------------------------- # 2) Download data from GDrive # ------------------------- def ensure_data_downloaded(): os.makedirs(DATA_DIR, exist_ok=True) for name, fid in FILE_LIST.items(): out_path = os.path.join(DATA_DIR, name) if not os.path.exists(out_path): url = f"https://drive.google.com/uc?id={fid}" gdown.download(url, out_path, quiet=True) # ------------------------- # 3) Build RAG resources once # ------------------------- @st.cache_resource def build_rag(): api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAIAPIKEY") if not api_key: raise RuntimeError("Missing OpenAI API key. Set it in HF Space Secrets as OPENAIAPIKEY.") ensure_data_downloaded() loader = DirectoryLoader(DATA_DIR, glob="*.pdf", loader_cls=PyPDFLoader) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) chunks = splitter.split_documents(documents) embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=api_key) vectordb = Chroma.from_texts( [c.page_content for c in chunks], embeddings, collection_name="meddoc", persist_directory=DB_DIR ) retriever = vectordb.as_retriever(search_kwargs={"k": 2}) llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=api_key) return retriever, llm try: retriever, llm = build_rag() except Exception as e: st.error(f"Failed to initialize RAG: {e}") st.stop() # ------------------------- # 4) Load CSVs # ------------------------- credit_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Credit Score.csv")) status_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Account Status.csv")) pr_df = pd.read_csv(os.path.join(DATA_DIR, "Government PR Status.csv")) def get_customer_details(customer_id: str): customer_id = customer_id.strip() if not customer_id.isdigit(): raise ValueError("Customer ID must be a number (e.g., 1001).") id_int = int(customer_id) credit_row = credit_df[credit_df["ID"] == id_int] status_row = status_df[status_df["ID"] == id_int] pr_row = pr_df[pr_df["ID"] == id_int] if credit_row.empty: raise ValueError(f"Customer ID {customer_id} not found in Credit Score table.") if status_row.empty: raise ValueError(f"Customer ID {customer_id} not found in Account Status table.") credit_score = int(credit_row["Credit Score"].values[0]) account_status = status_row["Account Status"].values[0] pr_status = pr_row["PR Status"].values[0] if not pr_row.empty else "Singaporean" return credit_score, account_status, pr_status def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def build_question(customer_id): credit, status, pr = get_customer_details(customer_id) return ( f"Customer ID: {customer_id}, " f"Please provide a recommendation for Credit Score: {credit}, " f"Account Status: {status}, PR Status: {pr}" ) # ------------------------- # 5) Prompt (paste your TEMPLATE exactly) # ------------------------- TEMPLATE = """ ### Role You are an experienced, detailed, caring, friendly, and warm Senior Loan Officer. Your primary task is to use the provided customer data and internal guidelines to generate a precise loan recommendation. ### MANDATORY INSTRUCTIONS STRICTLY FOLLOW the steps below. The output must adhere exactly to the required Output Summary Format. 1. Analyze Data & Context: Utilize the customer's profile data from {question} and the financial criteria/loan guidelines from the {context} (retrieved documents). 2. Immediate Eligibility Check: If the PR Status is identified as Not Applicable, Foreigner, or any value indicating non-Permanent Resident status, STOP the analysis. Set the Overall Risk to "High" and the Interest Rate to "N/A." Overwrite the Recommendation with a friendly summary stating the applicant is Not Eligible for Foreigner without PR Status. 3. Determine Overall Risk: Based on the Credit Score, Account Status, and {context} guidelines, determine the applicant's Overall Risk level (e.g., Low, Medium, High). 4. Determine Interest Rate: Use the determined Overall Risk level and the specific interest rate matrix found within the {context} to assign the appropriate Interest Rate. 5. Final Recommendation Summary: Summarize the entire decision (including the basis for the risk and rate) in a single, professional, and warm paragraph less than 60 words. ### Unavailability Protocol If the necessary guidelines (risk tiers or interest rates) are missing from the {context} to complete steps 3 and 4, you must ONLY respond with the exact sentence: "I do not have the complete information for now and will need further review to provide an accurate recommendation." ### Output Summary Format Produce the output summary using the exact structure below, filling in the bracketed placeholders: Output Summary: Customer ID: [Customer ID] Credit Score: [Credit Score] Account Status: [Account Status] PR Status: [PR Status] Overall Risk: [Low/Medium/High/Not Applicable] Interest Rate: [e.g., 4.5% or N/A] Recommendation: [Final summary less than 30 words] Query: {question} Context: {context} """ rag_prompt = ChatPromptTemplate.from_template(TEMPLATE) chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | rag_prompt | llm | StrOutputParser() ) # ------------------------- # 6) Streamlit UI # ------------------------- st.title("Loan Processing RAG App") st.write("Enter a Customer ID to retrieve credit details and generate recommendations.") customer_id = st.text_input("Enter Customer ID") if st.button("Run Analysis"): if not customer_id.strip(): st.error("Please enter a valid Customer ID.") else: try: question = build_question(customer_id) response = chain.invoke(question) st.success("Analysis completed.") st.subheader("Output Summary") st.text(response) except Exception as e: st.error(str(e))