Spaces:

yhng2525
/

Loan-Processing-Streamlit

Sleeping

App Files Files Community

Loan-Processing-Streamlit / src /streamlit_app.py

yhng2525

Update src/streamlit_app.py

6577c9d verified about 2 months ago

raw

history blame contribute delete

7.49 kB

	# app.py
	import os
	import streamlit as st
	import pandas as pd
	import gdown

	from langchain_openai import ChatOpenAI, OpenAIEmbeddings
	from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser

	# -------------------------
	# 1) Config + Secrets
	# -------------------------
	st.set_page_config(page_title="Loan Processing RAG App", layout="centered")

	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # set in HF Space Secrets
	if not OPENAI_API_KEY:
	st.error("Missing OPENAI_API_KEY. Please set it in Hugging Face Space → Settings → Secrets.")
	st.stop()

	DATA_DIR = "datafile"
	DB_DIR = "meddoc_db" # persisted in the Space runtime filesystem

	FILE_LIST = {
	"Bank Loan Interest Rate Policy.pdf": "1HBIz4oRl6JKkozjzH82ktmuo0k9XC6nI",
	"Bank Loan Overall Risk Policy.pdf": "1I2GleuGPrl14e9QFE6J7hLiWqf9uyrQo",
	"Customer Account Status.csv": "101J48wIK0LrmOCvY--Eoja5Z7wTRVavM",
	"Customer Credit Score.csv": "1-umr8CKLZHFjoqCY9rimhdBscg5NucLJ",
	"Government PR Status.csv": "1Em_Qg0v9moR108K1WgDMh-b00DebvRs7"
	}

	# -------------------------
	# 2) Download data from GDrive
	# -------------------------
	def ensure_data_downloaded():
	os.makedirs(DATA_DIR, exist_ok=True)
	for name, fid in FILE_LIST.items():
	out_path = os.path.join(DATA_DIR, name)
	if not os.path.exists(out_path):
	url = f"https://drive.google.com/uc?id={fid}"
	gdown.download(url, out_path, quiet=True)

	# -------------------------
	# 3) Build RAG resources once
	# -------------------------
	@st.cache_resource
	def build_rag():
	api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAIAPIKEY")
	if not api_key:
	raise RuntimeError("Missing OpenAI API key. Set it in HF Space Secrets as OPENAIAPIKEY.")

	ensure_data_downloaded()

	loader = DirectoryLoader(DATA_DIR, glob="*.pdf", loader_cls=PyPDFLoader)
	documents = loader.load()

	splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	chunks = splitter.split_documents(documents)

	embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=api_key)

	vectordb = Chroma.from_texts(
	[c.page_content for c in chunks],
	embeddings,
	collection_name="meddoc",
	persist_directory=DB_DIR
	)
	retriever = vectordb.as_retriever(search_kwargs={"k": 2})

	llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=api_key)
	return retriever, llm

	try:
	retriever, llm = build_rag()
	except Exception as e:
	st.error(f"Failed to initialize RAG: {e}")
	st.stop()

	# -------------------------
	# 4) Load CSVs
	# -------------------------
	credit_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Credit Score.csv"))
	status_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Account Status.csv"))
	pr_df = pd.read_csv(os.path.join(DATA_DIR, "Government PR Status.csv"))

	def get_customer_details(customer_id: str):
	customer_id = customer_id.strip()
	if not customer_id.isdigit():
	raise ValueError("Customer ID must be a number (e.g., 1001).")
	id_int = int(customer_id)
	credit_row = credit_df[credit_df["ID"] == id_int]
	status_row = status_df[status_df["ID"] == id_int]
	pr_row = pr_df[pr_df["ID"] == id_int]

	if credit_row.empty:
	raise ValueError(f"Customer ID {customer_id} not found in Credit Score table.")
	if status_row.empty:
	raise ValueError(f"Customer ID {customer_id} not found in Account Status table.")

	credit_score = int(credit_row["Credit Score"].values[0])
	account_status = status_row["Account Status"].values[0]
	pr_status = pr_row["PR Status"].values[0] if not pr_row.empty else "Singaporean"
	return credit_score, account_status, pr_status

	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	def build_question(customer_id):
	credit, status, pr = get_customer_details(customer_id)
	return (
	f"Customer ID: {customer_id}, "
	f"Please provide a recommendation for Credit Score: {credit}, "
	f"Account Status: {status}, PR Status: {pr}"
	)

	# -------------------------
	# 5) Prompt (paste your TEMPLATE exactly)
	# -------------------------
	TEMPLATE = """
	### Role
	You are an experienced, detailed, caring, friendly, and warm Senior Loan Officer. Your primary task is to use the provided customer data and internal guidelines to generate a precise loan recommendation.

	### MANDATORY INSTRUCTIONS

	STRICTLY FOLLOW the steps below. The output must adhere exactly to the required Output Summary Format.

	1. Analyze Data & Context: Utilize the customer's profile data from {question} and the financial criteria/loan guidelines from the {context} (retrieved documents).
	2. Immediate Eligibility Check:
	If the PR Status is identified as Not Applicable, Foreigner, or any value indicating non-Permanent Resident status, STOP the analysis. Set the Overall Risk to "High" and the Interest Rate to "N/A." Overwrite the Recommendation with a friendly summary stating the applicant is Not Eligible for Foreigner without PR Status.
	3. Determine Overall Risk: Based on the Credit Score, Account Status, and {context} guidelines, determine the applicant's Overall Risk level (e.g., Low, Medium, High).
	4. Determine Interest Rate: Use the determined Overall Risk level and the specific interest rate matrix found within the {context} to assign the appropriate Interest Rate.
	5. Final Recommendation Summary: Summarize the entire decision (including the basis for the risk and rate) in a single, professional, and warm paragraph less than 60 words.

	### Unavailability Protocol

	If the necessary guidelines (risk tiers or interest rates) are missing from the {context} to complete steps 3 and 4, you must ONLY respond with the exact sentence: "I do not have the complete information for now and will need further review to provide an accurate recommendation."

	### Output Summary Format

	Produce the output summary using the exact structure below, filling in the bracketed placeholders:

	Output Summary:
	Customer ID: [Customer ID]
	Credit Score: [Credit Score]
	Account Status: [Account Status]
	PR Status: [PR Status]
	Overall Risk: [Low/Medium/High/Not Applicable]
	Interest Rate: [e.g., 4.5% or N/A]
	Recommendation: [Final summary less than 30 words]

	Query:
	{question}

	Context:
	{context}
	"""

	rag_prompt = ChatPromptTemplate.from_template(TEMPLATE)

	chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| rag_prompt
	\| llm
	\| StrOutputParser()
	)

	# -------------------------
	# 6) Streamlit UI
	# -------------------------
	st.title("Loan Processing RAG App")
	st.write("Enter a Customer ID to retrieve credit details and generate recommendations.")

	customer_id = st.text_input("Enter Customer ID")

	if st.button("Run Analysis"):
	if not customer_id.strip():
	st.error("Please enter a valid Customer ID.")
	else:
	try:
	question = build_question(customer_id)
	response = chain.invoke(question)
	st.success("Analysis completed.")
	st.subheader("Output Summary")
	st.text(response)
	except Exception as e:
	st.error(str(e))