Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import streamlit as st | |
| import pandas as pd | |
| import gdown | |
| from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
| from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| # ------------------------- | |
| # 1) Config + Secrets | |
| # ------------------------- | |
| st.set_page_config(page_title="Loan Processing RAG App", layout="centered") | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # set in HF Space Secrets | |
| if not OPENAI_API_KEY: | |
| st.error("Missing OPENAI_API_KEY. Please set it in Hugging Face Space → Settings → Secrets.") | |
| st.stop() | |
| DATA_DIR = "datafile" | |
| DB_DIR = "meddoc_db" # persisted in the Space runtime filesystem | |
| FILE_LIST = { | |
| "Bank Loan Interest Rate Policy.pdf": "1HBIz4oRl6JKkozjzH82ktmuo0k9XC6nI", | |
| "Bank Loan Overall Risk Policy.pdf": "1I2GleuGPrl14e9QFE6J7hLiWqf9uyrQo", | |
| "Customer Account Status.csv": "101J48wIK0LrmOCvY--Eoja5Z7wTRVavM", | |
| "Customer Credit Score.csv": "1-umr8CKLZHFjoqCY9rimhdBscg5NucLJ", | |
| "Government PR Status.csv": "1Em_Qg0v9moR108K1WgDMh-b00DebvRs7" | |
| } | |
| # ------------------------- | |
| # 2) Download data from GDrive | |
| # ------------------------- | |
| def ensure_data_downloaded(): | |
| os.makedirs(DATA_DIR, exist_ok=True) | |
| for name, fid in FILE_LIST.items(): | |
| out_path = os.path.join(DATA_DIR, name) | |
| if not os.path.exists(out_path): | |
| url = f"https://drive.google.com/uc?id={fid}" | |
| gdown.download(url, out_path, quiet=True) | |
| # ------------------------- | |
| # 3) Build RAG resources once | |
| # ------------------------- | |
| def build_rag(): | |
| api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAIAPIKEY") | |
| if not api_key: | |
| raise RuntimeError("Missing OpenAI API key. Set it in HF Space Secrets as OPENAIAPIKEY.") | |
| ensure_data_downloaded() | |
| loader = DirectoryLoader(DATA_DIR, glob="*.pdf", loader_cls=PyPDFLoader) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
| chunks = splitter.split_documents(documents) | |
| embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=api_key) | |
| vectordb = Chroma.from_texts( | |
| [c.page_content for c in chunks], | |
| embeddings, | |
| collection_name="meddoc", | |
| persist_directory=DB_DIR | |
| ) | |
| retriever = vectordb.as_retriever(search_kwargs={"k": 2}) | |
| llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=api_key) | |
| return retriever, llm | |
| try: | |
| retriever, llm = build_rag() | |
| except Exception as e: | |
| st.error(f"Failed to initialize RAG: {e}") | |
| st.stop() | |
| # ------------------------- | |
| # 4) Load CSVs | |
| # ------------------------- | |
| credit_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Credit Score.csv")) | |
| status_df = pd.read_csv(os.path.join(DATA_DIR, "Customer Account Status.csv")) | |
| pr_df = pd.read_csv(os.path.join(DATA_DIR, "Government PR Status.csv")) | |
| def get_customer_details(customer_id: str): | |
| customer_id = customer_id.strip() | |
| if not customer_id.isdigit(): | |
| raise ValueError("Customer ID must be a number (e.g., 1001).") | |
| id_int = int(customer_id) | |
| credit_row = credit_df[credit_df["ID"] == id_int] | |
| status_row = status_df[status_df["ID"] == id_int] | |
| pr_row = pr_df[pr_df["ID"] == id_int] | |
| if credit_row.empty: | |
| raise ValueError(f"Customer ID {customer_id} not found in Credit Score table.") | |
| if status_row.empty: | |
| raise ValueError(f"Customer ID {customer_id} not found in Account Status table.") | |
| credit_score = int(credit_row["Credit Score"].values[0]) | |
| account_status = status_row["Account Status"].values[0] | |
| pr_status = pr_row["PR Status"].values[0] if not pr_row.empty else "Singaporean" | |
| return credit_score, account_status, pr_status | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| def build_question(customer_id): | |
| credit, status, pr = get_customer_details(customer_id) | |
| return ( | |
| f"Customer ID: {customer_id}, " | |
| f"Please provide a recommendation for Credit Score: {credit}, " | |
| f"Account Status: {status}, PR Status: {pr}" | |
| ) | |
| # ------------------------- | |
| # 5) Prompt (paste your TEMPLATE exactly) | |
| # ------------------------- | |
| TEMPLATE = """ | |
| ### Role | |
| You are an experienced, detailed, caring, friendly, and warm Senior Loan Officer. Your primary task is to use the provided customer data and internal guidelines to generate a precise loan recommendation. | |
| ### MANDATORY INSTRUCTIONS | |
| STRICTLY FOLLOW the steps below. The output must adhere exactly to the required Output Summary Format. | |
| 1. Analyze Data & Context: Utilize the customer's profile data from {question} and the financial criteria/loan guidelines from the {context} (retrieved documents). | |
| 2. Immediate Eligibility Check: | |
| If the PR Status is identified as Not Applicable, Foreigner, or any value indicating non-Permanent Resident status, STOP the analysis. Set the Overall Risk to "High" and the Interest Rate to "N/A." Overwrite the Recommendation with a friendly summary stating the applicant is Not Eligible for Foreigner without PR Status. | |
| 3. Determine Overall Risk: Based on the Credit Score, Account Status, and {context} guidelines, determine the applicant's Overall Risk level (e.g., Low, Medium, High). | |
| 4. Determine Interest Rate: Use the determined Overall Risk level and the specific interest rate matrix found within the {context} to assign the appropriate Interest Rate. | |
| 5. Final Recommendation Summary: Summarize the entire decision (including the basis for the risk and rate) in a single, professional, and warm paragraph less than 60 words. | |
| ### Unavailability Protocol | |
| If the necessary guidelines (risk tiers or interest rates) are missing from the {context} to complete steps 3 and 4, you must ONLY respond with the exact sentence: "I do not have the complete information for now and will need further review to provide an accurate recommendation." | |
| ### Output Summary Format | |
| Produce the output summary using the exact structure below, filling in the bracketed placeholders: | |
| Output Summary: | |
| Customer ID: [Customer ID] | |
| Credit Score: [Credit Score] | |
| Account Status: [Account Status] | |
| PR Status: [PR Status] | |
| Overall Risk: [Low/Medium/High/Not Applicable] | |
| Interest Rate: [e.g., 4.5% or N/A] | |
| Recommendation: [Final summary less than 30 words] | |
| Query: | |
| {question} | |
| Context: | |
| {context} | |
| """ | |
| rag_prompt = ChatPromptTemplate.from_template(TEMPLATE) | |
| chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | rag_prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| # ------------------------- | |
| # 6) Streamlit UI | |
| # ------------------------- | |
| st.title("Loan Processing RAG App") | |
| st.write("Enter a Customer ID to retrieve credit details and generate recommendations.") | |
| customer_id = st.text_input("Enter Customer ID") | |
| if st.button("Run Analysis"): | |
| if not customer_id.strip(): | |
| st.error("Please enter a valid Customer ID.") | |
| else: | |
| try: | |
| question = build_question(customer_id) | |
| response = chain.invoke(question) | |
| st.success("Analysis completed.") | |
| st.subheader("Output Summary") | |
| st.text(response) | |
| except Exception as e: | |
| st.error(str(e)) | |