Loan-Processing-Streamlit / src /streamlit_app.py
yhng2525's picture
Update src/streamlit_app.py
6577c9d verified
# app.py
import os
import streamlit as st
import pandas as pd
import gdown
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# -------------------------
# 1) Config + Secrets
# -------------------------
st.set_page_config(page_title="Loan Processing RAG App", layout="centered")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # set in HF Space Secrets
if not OPENAI_API_KEY:
st.error("Missing OPENAI_API_KEY. Please set it in Hugging Face Space → Settings → Secrets.")
st.stop()
DATA_DIR = "datafile"
DB_DIR = "meddoc_db" # persisted in the Space runtime filesystem
FILE_LIST = {
"Bank Loan Interest Rate Policy.pdf": "1HBIz4oRl6JKkozjzH82ktmuo0k9XC6nI",
"Bank Loan Overall Risk Policy.pdf": "1I2GleuGPrl14e9QFE6J7hLiWqf9uyrQo",
"Customer Account Status.csv": "101J48wIK0LrmOCvY--Eoja5Z7wTRVavM",
"Customer Credit Score.csv": "1-umr8CKLZHFjoqCY9rimhdBscg5NucLJ",
"Government PR Status.csv": "1Em_Qg0v9moR108K1WgDMh-b00DebvRs7"
}
# -------------------------
# 2) Download data from GDrive
# -------------------------
def ensure_data_downloaded():
os.makedirs(DATA_DIR, exist_ok=True)
for name, fid in FILE_LIST.items():
out_path = os.path.join(DATA_DIR, name)
if not os.path.exists(out_path):
url = f"https://drive.google.com/uc?id={fid}"
gdown.download(url, out_path, quiet=True)
# -------------------------
# 3) Build RAG resources once
# -------------------------
@st.cache_resource
def build_rag():
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAIAPIKEY")
if not api_key:
raise RuntimeError("Missing OpenAI API key. Set it in HF Space Secrets as OPENAIAPIKEY.")
ensure_data_downloaded()
loader = DirectoryLoader(DATA_DIR, glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=api_key)
vectordb = Chroma.from_texts(
[c.page_content for c in chunks],
embeddings,
collection_name="meddoc",
persist_directory=DB_DIR
)
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=api_key)
return retriever, llm
try:
retriever, llm = build_rag()
except Exception as e:
st.error(f"Failed to initialize RAG: {e}")
st.stop()
# -------------------------
# 4) Load CSVs
# -------------------------
credit_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Credit Score.csv"))
status_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Account Status.csv"))
pr_df = pd.read_csv(os.path.join(DATA_DIR, "Government PR Status.csv"))
def get_customer_details(customer_id: str):
customer_id = customer_id.strip()
if not customer_id.isdigit():
raise ValueError("Customer ID must be a number (e.g., 1001).")
id_int = int(customer_id)
credit_row = credit_df[credit_df["ID"] == id_int]
status_row = status_df[status_df["ID"] == id_int]
pr_row = pr_df[pr_df["ID"] == id_int]
if credit_row.empty:
raise ValueError(f"Customer ID {customer_id} not found in Credit Score table.")
if status_row.empty:
raise ValueError(f"Customer ID {customer_id} not found in Account Status table.")
credit_score = int(credit_row["Credit Score"].values[0])
account_status = status_row["Account Status"].values[0]
pr_status = pr_row["PR Status"].values[0] if not pr_row.empty else "Singaporean"
return credit_score, account_status, pr_status
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
def build_question(customer_id):
credit, status, pr = get_customer_details(customer_id)
return (
f"Customer ID: {customer_id}, "
f"Please provide a recommendation for Credit Score: {credit}, "
f"Account Status: {status}, PR Status: {pr}"
)
# -------------------------
# 5) Prompt (paste your TEMPLATE exactly)
# -------------------------
TEMPLATE = """
### Role
You are an experienced, detailed, caring, friendly, and warm Senior Loan Officer. Your primary task is to use the provided customer data and internal guidelines to generate a precise loan recommendation.
### MANDATORY INSTRUCTIONS
STRICTLY FOLLOW the steps below. The output must adhere exactly to the required Output Summary Format.
1. Analyze Data & Context: Utilize the customer's profile data from {question} and the financial criteria/loan guidelines from the {context} (retrieved documents).
2. Immediate Eligibility Check:
If the PR Status is identified as Not Applicable, Foreigner, or any value indicating non-Permanent Resident status, STOP the analysis. Set the Overall Risk to "High" and the Interest Rate to "N/A." Overwrite the Recommendation with a friendly summary stating the applicant is Not Eligible for Foreigner without PR Status.
3. Determine Overall Risk: Based on the Credit Score, Account Status, and {context} guidelines, determine the applicant's Overall Risk level (e.g., Low, Medium, High).
4. Determine Interest Rate: Use the determined Overall Risk level and the specific interest rate matrix found within the {context} to assign the appropriate Interest Rate.
5. Final Recommendation Summary: Summarize the entire decision (including the basis for the risk and rate) in a single, professional, and warm paragraph less than 60 words.
### Unavailability Protocol
If the necessary guidelines (risk tiers or interest rates) are missing from the {context} to complete steps 3 and 4, you must ONLY respond with the exact sentence: "I do not have the complete information for now and will need further review to provide an accurate recommendation."
### Output Summary Format
Produce the output summary using the exact structure below, filling in the bracketed placeholders:
Output Summary:
Customer ID: [Customer ID]
Credit Score: [Credit Score]
Account Status: [Account Status]
PR Status: [PR Status]
Overall Risk: [Low/Medium/High/Not Applicable]
Interest Rate: [e.g., 4.5% or N/A]
Recommendation: [Final summary less than 30 words]
Query:
{question}
Context:
{context}
"""
rag_prompt = ChatPromptTemplate.from_template(TEMPLATE)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| rag_prompt
| llm
| StrOutputParser()
)
# -------------------------
# 6) Streamlit UI
# -------------------------
st.title("Loan Processing RAG App")
st.write("Enter a Customer ID to retrieve credit details and generate recommendations.")
customer_id = st.text_input("Enter Customer ID")
if st.button("Run Analysis"):
if not customer_id.strip():
st.error("Please enter a valid Customer ID.")
else:
try:
question = build_question(customer_id)
response = chain.invoke(question)
st.success("Analysis completed.")
st.subheader("Output Summary")
st.text(response)
except Exception as e:
st.error(str(e))