Rogerleong commited on
Commit
650810f
Β·
verified Β·
1 Parent(s): 7d94ab1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +257 -295
app.py CHANGED
@@ -1,18 +1,28 @@
1
  import os
2
  import re
3
  import textwrap
 
4
  import pandas as pd
5
  import gradio as gr
6
 
7
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 
8
  from langchain_core.documents import Document
9
  from langchain_core.prompts import PromptTemplate
10
- from langchain_community.vectorstores import FAISS
11
 
12
 
13
- # =========================================================
14
- # Pretty output helpers (same idea as your Step 6)
15
- # =========================================================
 
 
 
 
 
 
 
 
 
16
  def one_sentence_per_line(text: str, width: int = 110) -> str:
17
  if text is None:
18
  return ""
@@ -23,8 +33,7 @@ def one_sentence_per_line(text: str, width: int = 110) -> str:
23
  prefix, body = prefix_match.group(1), prefix_match.group(2)
24
  wrapped = textwrap.wrap(body, width=max(20, width - len(prefix))) or [""]
25
  return [prefix + wrapped[0]] + [(" " * len(prefix)) + w for w in wrapped[1:]]
26
- wrapped = textwrap.wrap(line, width=width) or [""]
27
- return wrapped
28
 
29
  out_lines = []
30
  for raw_line in str(text).splitlines():
@@ -35,10 +44,8 @@ def one_sentence_per_line(text: str, width: int = 110) -> str:
35
  parts = re.split(r"(?<=[.!?])\s+", line)
36
  for part in parts:
37
  part = part.strip()
38
- if not part:
39
- continue
40
- out_lines.extend(_wrap_line(part))
41
-
42
  return "\n".join(out_lines)
43
 
44
 
@@ -55,35 +62,30 @@ def enforce_third_person(text: str, customer_name: str) -> str:
55
  return text
56
 
57
 
58
- def format_customer_profile(profile) -> str:
59
- if profile is None:
60
  return ""
61
-
62
- if isinstance(profile, str):
63
- return profile
64
-
65
  d = dict(profile)
66
- preferred_order = ["ID", "Name", "Email", "Credit_Score", "Nationality", "Account_Status", "PR_Status"]
 
67
 
68
  nat = str(d.get("Nationality", "")).strip().lower()
69
  if nat == "singaporean":
70
  d.pop("PR_Status", None)
71
 
72
  lines = []
73
- for k in preferred_order:
74
  if k in d:
75
  lines.append(f"{k}: {d.get(k)}")
76
-
77
  for k in sorted(d.keys()):
78
- if k not in preferred_order:
79
  lines.append(f"{k}: {d.get(k)}")
80
-
81
  return "\n".join(lines)
82
 
83
 
84
- # =========================================================
85
- # Load CSV (same idea as Step 3)
86
- # =========================================================
87
  def load_customer_csv(csv_path: str) -> pd.DataFrame:
88
  df_all = pd.read_csv(csv_path)
89
  df_all.columns = [c.strip() for c in df_all.columns]
@@ -124,10 +126,9 @@ def build_mock_systems(df_all: pd.DataFrame):
124
  return df_credit, df_account, df_gov
125
 
126
 
127
- def get_customer_profile(customer_id: str, df_credit, df_account, df_gov):
128
  customer_id = str(customer_id).strip()
129
-
130
- credit_rec = df_credit[df_credit["ID"].astype(str) == customer_id]
131
  if credit_rec.empty:
132
  return None
133
 
@@ -135,13 +136,13 @@ def get_customer_profile(customer_id: str, df_credit, df_account, df_gov):
135
  email = credit_rec.iloc[0]["Email"]
136
  credit_score = int(credit_rec.iloc[0]["Credit_Score"])
137
 
138
- acct_rec = df_account[df_account["ID"].astype(str) == customer_id]
139
  nationality = acct_rec.iloc[0]["Nationality"] if not acct_rec.empty else None
140
  account_status = acct_rec.iloc[0]["Account_Status"] if not acct_rec.empty else None
141
 
142
  pr_status = None
143
  if nationality and str(nationality).strip().lower() == "non-singaporean":
144
- gov_rec = df_gov[df_gov["ID"].astype(str) == customer_id]
145
  pr_status = bool(gov_rec.iloc[0]["PR_Status"]) if not gov_rec.empty else None
146
 
147
  return {
@@ -155,9 +156,9 @@ def get_customer_profile(customer_id: str, df_credit, df_account, df_gov):
155
  }
156
 
157
 
158
- # =========================================================
159
- # PDFs -> text -> parse rules (same as Step 4)
160
- # =========================================================
161
  def extract_pdf_text(pdf_path: str) -> str:
162
  from pypdf import PdfReader
163
  reader = PdfReader(pdf_path)
@@ -167,80 +168,63 @@ def extract_pdf_text(pdf_path: str) -> str:
167
  return "\n".join(pages)
168
 
169
 
170
- def load_policies(risk_pdf_path: str, interest_pdf_path: str):
171
- risk_policy_text = extract_pdf_text(risk_pdf_path)
172
- interest_policy_text = extract_pdf_text(interest_pdf_path)
173
-
174
  rate_matches = re.findall(
175
  r"\b(Low|Medium|High)\b\s+([0-9]+\.[0-9]+)\s*%?",
176
- interest_policy_text,
177
  flags=re.IGNORECASE
178
  )
179
  interest_rates = {k.capitalize(): float(v) for k, v in rate_matches}
180
 
 
181
  risk_rows = re.findall(
182
  r"(\d{3})\s*(?:-|–|β€”)?\s*(\d{3})\s+(Delinquent|Closed|Good-standing)\s+(High|Medium|Low)",
183
- risk_policy_text,
184
  flags=re.IGNORECASE,
185
  )
186
 
187
  risk_mapping = {}
188
  for lo, hi, status, risk in risk_rows:
189
  band = (int(lo), int(hi))
190
- key = (band, status.strip().lower())
191
- risk_mapping[key] = risk.capitalize()
192
 
193
- return risk_policy_text, interest_policy_text, risk_mapping, interest_rates
194
 
195
 
196
- # =========================================================
197
- # Deterministic rules (same as Step 5)
198
- # =========================================================
199
  def _score_band(score: int):
200
- if 300 <= score <= 674:
201
- return (300, 674)
202
- if 675 <= score <= 749:
203
- return (675, 749)
204
- if 750 <= score <= 850:
205
- return (750, 850)
206
- if score < 300:
207
- return (300, 674)
208
  return (750, 850)
209
 
210
 
211
- def determine_overall_risk(score: int, account_status: str, risk_mapping: dict) -> str:
212
  band = _score_band(score)
213
  status = str(account_status).strip().lower()
214
- key = (band, status)
215
- if key not in risk_mapping:
216
- return "High"
217
- return risk_mapping[key]
218
 
219
 
220
- def determine_interest_rate(overall_risk: str, interest_rates: dict) -> float:
221
- return float(interest_rates[overall_risk])
222
 
223
 
224
- def is_non_singaporean_no_pr(customer_id: str, df_account, df_gov) -> bool:
225
- try:
226
- cid = str(customer_id).strip()
227
- nat_row = df_account[df_account["ID"].astype(str).str.strip() == cid]
228
- nationality = nat_row.iloc[0]["Nationality"] if not nat_row.empty else None
229
 
230
- pr_row = df_gov[df_gov["ID"].astype(str).str.strip() == cid]
231
- pr_status = bool(pr_row.iloc[0]["PR_Status"]) if not pr_row.empty else False
232
 
233
- return (str(nationality).strip().lower() != "singaporean") and (pr_status is False)
234
- except Exception:
235
- return False
236
 
237
 
238
- def apply_mandatory_exception_to_report(report_text: str, customer_id: str, df_account, df_gov) -> str:
239
- if report_text is None:
240
- report_text = ""
241
- text = str(report_text)
242
 
243
- if not is_non_singaporean_no_pr(customer_id, df_account, df_gov):
244
  return text
245
 
246
  text = re.sub(
@@ -264,12 +248,45 @@ def apply_mandatory_exception_to_report(report_text: str, customer_id: str, df_a
264
  return text
265
 
266
 
267
- # =========================================================
268
- # Prompts (same meaning as Step 6)
269
- # =========================================================
270
- qa_prompt_template = """
271
- You are a helpful banking assistant.
272
- Answer the user's question based strictly on the provided Customer Data and Policy Rules.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
  CUSTOMER DATA:
275
  {customer_data}
@@ -282,24 +299,22 @@ USER QUESTION:
282
 
283
  ANSWER:
284
  """
285
- qa_prompt = PromptTemplate(
286
- input_variables=["customer_data", "policy_rules", "question"],
287
- template=qa_prompt_template
288
  )
289
 
290
- advice_prompt_template = """You are a helpful loan officer assistant.
 
 
291
 
292
- Write in THIRD PERSON about the customer.
293
- Always use the customer's Name and possessive.
294
  Never address the reader as 'you' or 'your'.
295
 
296
- You must provide ADVICE/RECOMMENDATION.
297
  Use ONLY the provided Customer Data and Policy Rules.
298
 
299
  REQUIREMENTS:
300
- - Provide 3-5 actionable advice points.
301
- - Include a final recommendation: APPROVE or NOT RECOMMEND / REJECT.
302
- - If customer is Non-Singaporean and PR_Status is False, you MUST recommend NOT RECOMMEND / REJECT.
303
  - Keep it concise.
304
 
305
  CUSTOMER DATA:
@@ -313,17 +328,14 @@ USER QUESTION:
313
 
314
  ANSWER:
315
  """
316
- advice_prompt = PromptTemplate(
317
- input_variables=["customer_data", "policy_rules", "question"],
318
- template=advice_prompt_template
319
  )
320
 
321
- report_prompt_template = """
322
- You are a senior loan officer.
323
- Generate a comprehensive loan assessment report based on the provided customer data and banking policies.
324
- Analyze the customer's profile, determine their overall risk, and calculate the applicable interest rate.
325
- Provide a clear recommendation on whether to approve the loan and justify it with policy rules,
326
- especially for any exceptions.
327
 
328
  CUSTOMER DATA:
329
  {customer_data}
@@ -333,250 +345,200 @@ POLICY RULES:
333
 
334
  REPORT:
335
  """
336
- report_prompt = PromptTemplate(
337
- input_variables=["customer_data", "policy_rules"],
338
- template=report_prompt_template
339
  )
340
 
341
 
342
- # =========================================================
343
- # Spaces init: NO pip installs, NO getpass
344
- # =========================================================
345
- BASE = os.path.dirname(__file__)
346
- CSV_PATH = os.path.join(BASE, "Customer records.csv")
347
- RISK_PDF = os.path.join(BASE, "Risk_Policy.pdf")
348
- INT_PDF = os.path.join(BASE, "Interest_Rate_Policy.pdf")
349
 
350
- RETRIEVER_CACHE = None # build embeddings lazily
351
 
 
 
 
 
 
352
 
353
- def init():
354
- api_key = os.getenv("OPENAI_API_KEY", "").strip()
 
 
355
  if not api_key:
356
  raise RuntimeError("Missing OPENAI_API_KEY. Set it in Space Secrets.")
357
 
358
- missing = [p for p in [CSV_PATH, RISK_PDF, INT_PDF] if not os.path.exists(p)]
359
- if missing:
360
- raise RuntimeError("Missing required files:\n" + "\n".join(missing))
361
 
362
- # Load data
363
- df_all = load_customer_csv(CSV_PATH)
364
- df_credit, df_account, df_gov = build_mock_systems(df_all)
 
365
 
366
- # Load policies + parse
367
- risk_text, interest_text, risk_mapping, interest_rates = load_policies(RISK_PDF, INT_PDF)
368
- policy_full_text = risk_text + "\n\n" + interest_text
369
 
370
  # LLM
371
  model_name = os.getenv("OPENAI_MODEL", "gpt-4o")
372
- llm = ChatOpenAI(model=model_name, temperature=0)
373
 
374
  # Chains (LCEL)
375
- qa_chain = qa_prompt | llm
376
- advice_chain = advice_prompt | llm
377
- report_chain = report_prompt | llm
378
-
379
- # For dropdown convenience
380
- df_names = df_credit[["ID", "Name"]].copy()
381
- df_names["label"] = df_names["Name"] + " (ID " + df_names["ID"].astype(str) + ")"
382
- all_choices = df_names["label"].tolist()
383
-
384
- return (
385
- df_credit, df_account, df_gov,
386
- risk_mapping, interest_rates,
387
- risk_text, interest_text,
388
- policy_full_text,
389
- qa_chain, advice_chain, report_chain,
390
- df_names, all_choices
391
- )
392
-
393
 
394
- try:
395
- (
396
- DF_CREDIT, DF_ACCOUNT, DF_GOV,
397
- RISK_MAP, RATE_MAP,
398
- RISK_TEXT, INT_TEXT,
399
- POLICY_FULL,
400
- QA_CHAIN, ADVICE_CHAIN, REPORT_CHAIN,
401
- DF_NAMES, ALL_CHOICES
402
- ) = init()
403
- INIT_ERROR = None
404
  except Exception as e:
405
  INIT_ERROR = str(e)
406
 
407
 
408
- def _build_retriever_if_needed():
409
- global RETRIEVER_CACHE
410
- if RETRIEVER_CACHE is not None:
411
- return RETRIEVER_CACHE, None
412
 
413
- try:
414
- docs = [
415
- Document(page_content=RISK_TEXT, metadata={"source": "Risk_Policy.pdf"}),
416
- Document(page_content=INT_TEXT, metadata={"source": "Interest_Rate_Policy.pdf"}),
417
- ]
418
- embeddings = OpenAIEmbeddings()
419
- vector_db = FAISS.from_documents(docs, embeddings)
420
- RETRIEVER_CACHE = vector_db.as_retriever()
421
- return RETRIEVER_CACHE, None
422
- except Exception as e:
423
- return None, str(e)
 
424
 
425
 
426
- def _policy_context(use_rag: bool) -> str:
427
  if not use_rag:
428
  return POLICY_FULL
429
 
430
- retriever, err = _build_retriever_if_needed()
431
- if retriever is None:
432
- return POLICY_FULL + f"\n\n[Note] RAG disabled due to embeddings error: {err}"
433
-
434
  try:
435
- relevant_docs = retriever.invoke("risk level interest rate PR status")
436
- ctx = "\n\n".join([d.page_content for d in relevant_docs]).strip()
 
 
 
 
437
  return ctx if ctx else POLICY_FULL
438
  except Exception as e:
439
- return POLICY_FULL + f"\n\n[Note] RAG retrieval failed, using full policy text: {e}"
440
-
441
-
442
- def find_matches(name_or_id: str):
443
- if INIT_ERROR:
444
- return [], f"❌ Initialization error:\n{INIT_ERROR}"
445
-
446
- s = (name_or_id or "").strip()
447
- if not s:
448
- return [], "Type a Name or ID, then click Find."
449
-
450
- # ID
451
- if s.isdigit():
452
- prof = get_customer_profile(s, DF_CREDIT, DF_ACCOUNT, DF_GOV)
453
- if prof:
454
- label = prof["Name"] + " (ID " + str(prof["ID"]) + ")"
455
- return [label], f"βœ… Found ID {s}"
456
- return [], f"❌ ID {s} not found."
457
-
458
- # Name contains
459
- results = DF_CREDIT[DF_CREDIT["Name"].astype(str).str.contains(s, case=False, na=False)]
460
- if results.empty:
461
- return [], f"❌ No customer matched '{s}'."
462
-
463
- if len(results) == 1:
464
- row = results.iloc[0]
465
- label = row["Name"] + " (ID " + str(row["ID"]) + ")"
466
- return [label], f"βœ… Found Name '{row['Name']}'"
467
-
468
- labels = []
469
- for _, r in results.iterrows():
470
- labels.append(r["Name"] + " (ID " + str(r["ID"]) + ")")
471
- return labels, f"⚠️ Multiple matches for '{s}'. Please select one."
472
-
473
-
474
- def _resolve_id_from_label(label: str) -> str:
475
- row = DF_NAMES[DF_NAMES["label"] == label]
476
- if row.empty:
477
- return ""
478
- return str(row.iloc[0]["ID"])
479
 
480
 
481
- def run_action(selected_label: str, action: str, use_rag: bool):
 
 
 
482
  if INIT_ERROR:
483
- return f"❌ Initialization error:\n{INIT_ERROR}"
484
 
485
- cid = _resolve_id_from_label(selected_label)
486
- if not cid:
487
- return "❌ Please select an applicant first."
488
-
489
- profile = get_customer_profile(cid, DF_CREDIT, DF_ACCOUNT, DF_GOV)
490
- if not profile:
491
- return f"❌ Customer ID {cid} not found."
492
-
493
- prof_text = format_customer_profile(profile)
494
- name = profile.get("Name", "Customer")
495
-
496
- policy_context = _policy_context(use_rag)
497
-
498
- # Option 1: Risk & interest
499
- if action == "1) Check Risk & Interest":
500
- question = f"What are the risk level and applicable interest rate for the customer {cid}?"
501
- response = QA_CHAIN.invoke({
502
- "customer_data": prof_text,
503
- "policy_rules": policy_context,
504
- "question": question
505
- })
506
- return one_sentence_per_line(response.content)
507
-
508
- # Option 2: Advice
509
- if action == "2) Advice / Recommendation":
510
- question = f"What interest rate advice can be recommended for customer with Id {cid}?"
511
- response = ADVICE_CHAIN.invoke({
512
- "customer_data": prof_text,
513
- "policy_rules": policy_context,
514
- "question": question
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  })
516
- return one_sentence_per_line(enforce_third_person(response.content, name))
517
-
518
- # Option 3: Full report (+ mandatory exception enforcement)
519
- customer_data_with_exception = prof_text + (
520
- "\n\nMANDATORY EXCEPTION (must follow): Non-Singaporean with PR_Status = False => NOT RECOMMENDED / REJECTED."
521
- if is_non_singaporean_no_pr(cid, DF_ACCOUNT, DF_GOV)
522
- else ""
523
- )
524
 
525
- full_report = REPORT_CHAIN.invoke({
526
- "customer_data": customer_data_with_exception,
527
- "policy_rules": policy_context
528
- })
529
-
530
- report_text = apply_mandatory_exception_to_report(full_report.content, cid, DF_ACCOUNT, DF_GOV)
531
- return one_sentence_per_line(report_text)
532
 
533
 
534
- # =========================================================
535
- # Gradio UI (replaces your input() loop)
536
- # =========================================================
537
  with gr.Blocks(title="Bank Loan Officer System") as demo:
538
- gr.Markdown("# 🏦 BANK LOAN OFFICER SYSTEM (v2.0) β€” Web Demo")
539
- gr.Markdown("Type Applicant Name or ID β†’ Find β†’ Select β†’ Run an action.")
540
 
541
  if INIT_ERROR:
542
  gr.Markdown(f"## ❌ Initialization error\n\n```\n{INIT_ERROR}\n```")
543
 
544
- with gr.Row():
545
- name_or_id = gr.Textbox(label="Applicant Name OR ID", placeholder="e.g. Hilda or 3333")
546
- find_btn = gr.Button("πŸ”Ž Find")
 
547
 
548
- with gr.Row():
549
- matches = gr.Dropdown(
550
- label="Matches (select one)",
551
- choices=[] if INIT_ERROR else ALL_CHOICES,
552
- value=None
553
- )
554
- find_status = gr.Textbox(label="Find status", interactive=False)
555
-
556
- find_btn.click(fn=find_matches, inputs=[name_or_id], outputs=[matches, find_status])
557
-
558
- gr.Markdown("### Actions")
559
- with gr.Row():
560
- action = gr.Radio(
561
- label="Select Option",
562
- choices=[
563
- "1) Check Risk & Interest",
564
- "2) Advice / Recommendation",
565
- "3) FULL Formal Loan Report"
566
- ],
567
- value="1) Check Risk & Interest"
568
- )
569
- use_rag = gr.Checkbox(
570
- label="Use RAG (FAISS embeddings + retrieval). If it fails, auto fallback.",
571
- value=True
572
- )
573
 
574
  run_btn = gr.Button("πŸš€ Run")
575
- output = gr.Textbox(label="Output", lines=24)
 
 
 
576
 
577
- run_btn.click(fn=run_action, inputs=[matches, action, use_rag], outputs=[output])
578
 
579
- demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
 
580
  PORT = int(os.environ.get("PORT", 7860))
581
  demo.queue().launch(
582
  server_name="0.0.0.0",
 
1
  import os
2
  import re
3
  import textwrap
4
+ import traceback
5
  import pandas as pd
6
  import gradio as gr
7
 
8
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
  from langchain_core.documents import Document
11
  from langchain_core.prompts import PromptTemplate
 
12
 
13
 
14
+ # =========================
15
+ # Files expected in repo root
16
+ # =========================
17
+ BASE_DIR = os.path.dirname(__file__)
18
+ CSV_FILE = os.path.join(BASE_DIR, "Customer records.csv")
19
+ RISK_PDF = os.path.join(BASE_DIR, "Risk_Policy.pdf")
20
+ INT_PDF = os.path.join(BASE_DIR, "Interest_Rate_Policy.pdf")
21
+
22
+
23
+ # =========================
24
+ # Helpers: formatting
25
+ # =========================
26
  def one_sentence_per_line(text: str, width: int = 110) -> str:
27
  if text is None:
28
  return ""
 
33
  prefix, body = prefix_match.group(1), prefix_match.group(2)
34
  wrapped = textwrap.wrap(body, width=max(20, width - len(prefix))) or [""]
35
  return [prefix + wrapped[0]] + [(" " * len(prefix)) + w for w in wrapped[1:]]
36
+ return textwrap.wrap(line, width=width) or [""]
 
37
 
38
  out_lines = []
39
  for raw_line in str(text).splitlines():
 
44
  parts = re.split(r"(?<=[.!?])\s+", line)
45
  for part in parts:
46
  part = part.strip()
47
+ if part:
48
+ out_lines.extend(_wrap_line(part))
 
 
49
  return "\n".join(out_lines)
50
 
51
 
 
62
  return text
63
 
64
 
65
+ def format_customer_profile(profile: dict) -> str:
66
+ if not profile:
67
  return ""
 
 
 
 
68
  d = dict(profile)
69
+
70
+ preferred = ["ID", "Name", "Email", "Credit_Score", "Nationality", "Account_Status", "PR_Status"]
71
 
72
  nat = str(d.get("Nationality", "")).strip().lower()
73
  if nat == "singaporean":
74
  d.pop("PR_Status", None)
75
 
76
  lines = []
77
+ for k in preferred:
78
  if k in d:
79
  lines.append(f"{k}: {d.get(k)}")
 
80
  for k in sorted(d.keys()):
81
+ if k not in preferred:
82
  lines.append(f"{k}: {d.get(k)}")
 
83
  return "\n".join(lines)
84
 
85
 
86
+ # =========================
87
+ # Load CSV + build mock systems
88
+ # =========================
89
  def load_customer_csv(csv_path: str) -> pd.DataFrame:
90
  df_all = pd.read_csv(csv_path)
91
  df_all.columns = [c.strip() for c in df_all.columns]
 
126
  return df_credit, df_account, df_gov
127
 
128
 
129
+ def get_customer_profile(customer_id: str):
130
  customer_id = str(customer_id).strip()
131
+ credit_rec = DF_CREDIT[DF_CREDIT["ID"].astype(str) == customer_id]
 
132
  if credit_rec.empty:
133
  return None
134
 
 
136
  email = credit_rec.iloc[0]["Email"]
137
  credit_score = int(credit_rec.iloc[0]["Credit_Score"])
138
 
139
+ acct_rec = DF_ACCOUNT[DF_ACCOUNT["ID"].astype(str) == customer_id]
140
  nationality = acct_rec.iloc[0]["Nationality"] if not acct_rec.empty else None
141
  account_status = acct_rec.iloc[0]["Account_Status"] if not acct_rec.empty else None
142
 
143
  pr_status = None
144
  if nationality and str(nationality).strip().lower() == "non-singaporean":
145
+ gov_rec = DF_GOV[DF_GOV["ID"].astype(str) == customer_id]
146
  pr_status = bool(gov_rec.iloc[0]["PR_Status"]) if not gov_rec.empty else None
147
 
148
  return {
 
156
  }
157
 
158
 
159
+ # =========================
160
+ # PDF ingest + parse policies
161
+ # =========================
162
  def extract_pdf_text(pdf_path: str) -> str:
163
  from pypdf import PdfReader
164
  reader = PdfReader(pdf_path)
 
168
  return "\n".join(pages)
169
 
170
 
171
+ def parse_policies(risk_text: str, interest_text: str):
172
+ # Interest rates
 
 
173
  rate_matches = re.findall(
174
  r"\b(Low|Medium|High)\b\s+([0-9]+\.[0-9]+)\s*%?",
175
+ interest_text,
176
  flags=re.IGNORECASE
177
  )
178
  interest_rates = {k.capitalize(): float(v) for k, v in rate_matches}
179
 
180
+ # Risk table rows
181
  risk_rows = re.findall(
182
  r"(\d{3})\s*(?:-|–|β€”)?\s*(\d{3})\s+(Delinquent|Closed|Good-standing)\s+(High|Medium|Low)",
183
+ risk_text,
184
  flags=re.IGNORECASE,
185
  )
186
 
187
  risk_mapping = {}
188
  for lo, hi, status, risk in risk_rows:
189
  band = (int(lo), int(hi))
190
+ risk_mapping[(band, status.strip().lower())] = risk.capitalize()
 
191
 
192
+ return risk_mapping, interest_rates
193
 
194
 
 
 
 
195
  def _score_band(score: int):
196
+ if 300 <= score <= 674: return (300, 674)
197
+ if 675 <= score <= 749: return (675, 749)
198
+ if 750 <= score <= 850: return (750, 850)
199
+ if score < 300: return (300, 674)
 
 
 
 
200
  return (750, 850)
201
 
202
 
203
+ def determine_overall_risk(score: int, account_status: str) -> str:
204
  band = _score_band(score)
205
  status = str(account_status).strip().lower()
206
+ return RISK_MAPPING.get((band, status), "High")
 
 
 
207
 
208
 
209
+ def determine_interest_rate(overall_risk: str) -> float:
210
+ return float(INTEREST_RATES[overall_risk])
211
 
212
 
213
+ def is_non_singaporean_no_pr(customer_id: str) -> bool:
214
+ cid = str(customer_id).strip()
215
+ nat_row = DF_ACCOUNT[DF_ACCOUNT["ID"].astype(str).str.strip() == cid]
216
+ nationality = nat_row.iloc[0]["Nationality"] if not nat_row.empty else None
 
217
 
218
+ pr_row = DF_GOV[DF_GOV["ID"].astype(str).str.strip() == cid]
219
+ pr_status = bool(pr_row.iloc[0]["PR_Status"]) if not pr_row.empty else False
220
 
221
+ return (str(nationality).strip().lower() != "singaporean") and (pr_status is False)
 
 
222
 
223
 
224
+ def apply_mandatory_exception_to_report(report_text: str, customer_id: str) -> str:
225
+ text = "" if report_text is None else str(report_text)
 
 
226
 
227
+ if not is_non_singaporean_no_pr(customer_id):
228
  return text
229
 
230
  text = re.sub(
 
248
  return text
249
 
250
 
251
+ # =========================
252
+ # Unstructured resolver (Colab-style)
253
+ # =========================
254
+ def resolve_customer_id(unstructured_text: str):
255
+ s = (unstructured_text or "").strip()
256
+ if not s:
257
+ return None, "❌ Please enter Applicant Name or ID."
258
+
259
+ # 1) Extract an ID from any sentence
260
+ m = re.search(r"\b(\d{3,})\b", s)
261
+ if m:
262
+ cid = m.group(1)
263
+ prof = get_customer_profile(cid)
264
+ if prof:
265
+ return cid, f"βœ… Found ID {cid}: {prof['Name']}"
266
+ return None, f"❌ No such customer ID: {cid}"
267
+
268
+ # 2) Otherwise treat as name search (contains)
269
+ results = DF_CREDIT[DF_CREDIT["Name"].astype(str).str.contains(s, case=False, na=False)]
270
+ if results.empty:
271
+ return None, f"❌ No such customer: '{s}'"
272
+
273
+ if len(results) == 1:
274
+ cid = str(results.iloc[0]["ID"])
275
+ nm = str(results.iloc[0]["Name"])
276
+ return cid, f"βœ… Found Name '{nm}' -> ID {cid}"
277
+
278
+ # Multiple matches: no dropdown, just tell user to type ID
279
+ opts = ", ".join([f"{r['Name']} (ID {r['ID']})" for _, r in results.iterrows()])
280
+ return None, f"⚠️ Multiple customers match '{s}'. Please enter ID. Matches: {opts}"
281
+
282
+
283
+ # =========================
284
+ # Prompts / Chains
285
+ # =========================
286
+ QA_PROMPT = PromptTemplate(
287
+ input_variables=["customer_data", "policy_rules", "question"],
288
+ template="""
289
+ You are a helpful banking assistant. Answer the user's question based strictly on the provided Customer Data and Policy Rules.
290
 
291
  CUSTOMER DATA:
292
  {customer_data}
 
299
 
300
  ANSWER:
301
  """
 
 
 
302
  )
303
 
304
+ ADVICE_PROMPT = PromptTemplate(
305
+ input_variables=["customer_data", "policy_rules", "question"],
306
+ template="""You are a helpful loan officer assistant.
307
 
308
+ Write in THIRD PERSON about the customer. Always use the customer's Name and possessive.
 
309
  Never address the reader as 'you' or 'your'.
310
 
311
+ You must provide ADVICE/RECOMMENDATION (not just restating risk and rate).
312
  Use ONLY the provided Customer Data and Policy Rules.
313
 
314
  REQUIREMENTS:
315
+ - Provide 3-5 actionable advice points (short sentences).
316
+ - Include a clear final recommendation: APPROVE or NOT RECOMMEND / REJECT.
317
+ - If customer is Non-Singaporean and PR_Status is False, you MUST recommend NOT RECOMMEND / REJECT regardless of risk level.
318
  - Keep it concise.
319
 
320
  CUSTOMER DATA:
 
328
 
329
  ANSWER:
330
  """
 
 
 
331
  )
332
 
333
+ REPORT_PROMPT = PromptTemplate(
334
+ input_variables=["customer_data", "policy_rules"],
335
+ template="""
336
+ You are a senior loan officer. Generate a comprehensive loan assessment report based on the provided customer data and banking policies.
337
+ Analyze the customer's profile, determine overall risk, calculate interest rate, and provide a clear recommendation.
338
+ Follow any mandatory exceptions.
339
 
340
  CUSTOMER DATA:
341
  {customer_data}
 
345
 
346
  REPORT:
347
  """
 
 
 
348
  )
349
 
350
 
351
+ # =========================
352
+ # Global init (Spaces safe)
353
+ # =========================
354
+ RETRIEVER = None
355
+ POLICY_FULL = ""
356
+ LLM = None
 
357
 
358
+ INIT_ERROR = None
359
 
360
+ try:
361
+ # Check files
362
+ missing = [p for p in [CSV_FILE, RISK_PDF, INT_PDF] if not os.path.exists(p)]
363
+ if missing:
364
+ raise RuntimeError("Missing required files in repo root:\n" + "\n".join(missing))
365
 
366
+ # Clean key (fixes illegal header newline)
367
+ api_key = os.getenv("OPENAI_API_KEY", "")
368
+ api_key = api_key.strip()
369
+ os.environ["OPENAI_API_KEY"] = api_key
370
  if not api_key:
371
  raise RuntimeError("Missing OPENAI_API_KEY. Set it in Space Secrets.")
372
 
373
+ # Load CSV
374
+ DF_ALL = load_customer_csv(CSV_FILE)
375
+ DF_CREDIT, DF_ACCOUNT, DF_GOV = build_mock_systems(DF_ALL)
376
 
377
+ # Load PDFs
378
+ RISK_TEXT = extract_pdf_text(RISK_PDF)
379
+ INT_TEXT = extract_pdf_text(INT_PDF)
380
+ POLICY_FULL = RISK_TEXT + "\n\n" + INT_TEXT
381
 
382
+ # Parse policies
383
+ RISK_MAPPING, INTEREST_RATES = parse_policies(RISK_TEXT, INT_TEXT)
 
384
 
385
  # LLM
386
  model_name = os.getenv("OPENAI_MODEL", "gpt-4o")
387
+ LLM = ChatOpenAI(model=model_name, temperature=0, openai_api_key=api_key)
388
 
389
  # Chains (LCEL)
390
+ QA_CHAIN = QA_PROMPT | LLM
391
+ ADVICE_CHAIN = ADVICE_PROMPT | LLM
392
+ REPORT_CHAIN = REPORT_PROMPT | LLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
 
 
 
 
 
 
 
 
 
 
394
  except Exception as e:
395
  INIT_ERROR = str(e)
396
 
397
 
398
+ def build_retriever_if_needed():
399
+ global RETRIEVER
400
+ if RETRIEVER is not None:
401
+ return RETRIEVER
402
 
403
+ api_key = os.getenv("OPENAI_API_KEY", "").strip()
404
+ if not api_key:
405
+ return None
406
+
407
+ docs = [
408
+ Document(page_content=RISK_TEXT, metadata={"source": "Risk_Policy.pdf"}),
409
+ Document(page_content=INT_TEXT, metadata={"source": "Interest_Rate_Policy.pdf"}),
410
+ ]
411
+ embeddings = OpenAIEmbeddings(openai_api_key=api_key)
412
+ db = FAISS.from_documents(docs, embeddings)
413
+ RETRIEVER = db.as_retriever()
414
+ return RETRIEVER
415
 
416
 
417
+ def get_policy_context(use_rag: bool) -> str:
418
  if not use_rag:
419
  return POLICY_FULL
420
 
 
 
 
 
421
  try:
422
+ retriever = build_retriever_if_needed()
423
+ if retriever is None:
424
+ return POLICY_FULL + "\n\n[Note] RAG unavailable (missing API key). Using full policy text."
425
+
426
+ docs = retriever.invoke("risk level interest rate PR status")
427
+ ctx = "\n\n".join([d.page_content for d in docs]).strip()
428
  return ctx if ctx else POLICY_FULL
429
  except Exception as e:
430
+ return POLICY_FULL + f"\n\n[Note] RAG failed, using full policy text: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
 
433
+ # =========================
434
+ # Main Run (Colab-style)
435
+ # =========================
436
+ def run_action(user_input: str, action: str, use_rag: bool):
437
  if INIT_ERROR:
438
+ return f"❌ Initialization error:\n\n{INIT_ERROR}"
439
 
440
+ try:
441
+ customer_id, msg = resolve_customer_id(user_input)
442
+ if not customer_id:
443
+ return msg
444
+
445
+ profile = get_customer_profile(customer_id)
446
+ if not profile:
447
+ return f"❌ No such customer ID: {customer_id}"
448
+
449
+ profile_text = format_customer_profile(profile)
450
+ policy_context = get_policy_context(use_rag)
451
+
452
+ # Deterministic summary (optional but helps demo)
453
+ overall_risk = determine_overall_risk(profile["Credit_Score"], profile["Account_Status"])
454
+ rate = determine_interest_rate(overall_risk)
455
+ must_reject = is_non_singaporean_no_pr(customer_id)
456
+
457
+ det = [
458
+ msg,
459
+ "",
460
+ "Deterministic (Policy-based):",
461
+ f"- Overall risk: {overall_risk}",
462
+ f"- Interest rate: {rate:.3f}%",
463
+ ]
464
+ if must_reject:
465
+ det.append("- Mandatory exception: NOT RECOMMEND / REJECT (Non-Singaporean without PR).")
466
+ det_text = "\n".join(det)
467
+
468
+ if action == "1) Check Risk & Interest":
469
+ question = f"What are the risk level and applicable interest rate for the customer {customer_id}?"
470
+ resp = QA_CHAIN.invoke({
471
+ "customer_data": profile_text,
472
+ "policy_rules": policy_context,
473
+ "question": question
474
+ })
475
+ return det_text + "\n\nAI Output:\n" + one_sentence_per_line(resp.content)
476
+
477
+ if action == "2) Advice / Recommendation":
478
+ question = f"What interest rate advice can be recommended for customer with Id {customer_id}?"
479
+ resp = ADVICE_CHAIN.invoke({
480
+ "customer_data": profile_text,
481
+ "policy_rules": policy_context,
482
+ "question": question
483
+ })
484
+ return det_text + "\n\nAI Output:\n" + one_sentence_per_line(enforce_third_person(resp.content, profile.get("Name", "Customer")))
485
+
486
+ # 3) FULL report
487
+ customer_data_with_exception = profile_text + (
488
+ "\n\nMANDATORY EXCEPTION (must follow): Non-Singaporean with PR_Status = False => NOT RECOMMENDED / REJECTED."
489
+ if must_reject else ""
490
+ )
491
+ full_report = REPORT_CHAIN.invoke({
492
+ "customer_data": customer_data_with_exception,
493
+ "policy_rules": policy_context
494
  })
495
+ final_text = apply_mandatory_exception_to_report(full_report.content, customer_id)
496
+ return det_text + "\n\nFull Report:\n" + one_sentence_per_line(final_text)
 
 
 
 
 
 
497
 
498
+ except Exception:
499
+ return "❌ Run failed:\n\n" + traceback.format_exc()
 
 
 
 
 
500
 
501
 
502
+ # =========================
503
+ # Gradio UI (NO dropdown)
504
+ # =========================
505
  with gr.Blocks(title="Bank Loan Officer System") as demo:
506
+ gr.Markdown("# 🏦 Bank Loan Officer System (Unstructured Input Search)")
507
+ gr.Markdown("Type Applicant **Name / ID / sentence**. The system resolves and responds like your Colab notebook.")
508
 
509
  if INIT_ERROR:
510
  gr.Markdown(f"## ❌ Initialization error\n\n```\n{INIT_ERROR}\n```")
511
 
512
+ user_input = gr.Textbox(
513
+ label="Applicant Name or ID (unstructured)",
514
+ placeholder="e.g. 3333 OR Hilda OR 'please check loan for 3333'"
515
+ )
516
 
517
+ action = gr.Radio(
518
+ label="Action",
519
+ choices=[
520
+ "1) Check Risk & Interest",
521
+ "2) Advice / Recommendation",
522
+ "3) FULL Formal Loan Report"
523
+ ],
524
+ value="1) Check Risk & Interest"
525
+ )
526
+
527
+ use_rag = gr.Checkbox(
528
+ label="Use RAG (FAISS embeddings). If it fails, auto fallback.",
529
+ value=False
530
+ )
 
 
 
 
 
 
 
 
 
 
 
531
 
532
  run_btn = gr.Button("πŸš€ Run")
533
+ output = gr.Textbox(label="Output", lines=28)
534
+
535
+ run_btn.click(fn=run_action, inputs=[user_input, action, use_rag], outputs=[output])
536
+ user_input.submit(fn=run_action, inputs=[user_input, action, use_rag], outputs=[output])
537
 
 
538
 
539
+ # =========================
540
+ # HF Spaces launch (the add-on at bottom)
541
+ # =========================
542
  PORT = int(os.environ.get("PORT", 7860))
543
  demo.queue().launch(
544
  server_name="0.0.0.0",