larrysim commited on
Commit
48992c5
·
verified ·
1 Parent(s): 06f8c36

Update app.py

Browse files

fix the pdf missing error

Files changed (1) hide show
  1. app.py +25 -32
app.py CHANGED
@@ -15,8 +15,12 @@ st.set_page_config(page_title="Bank Loan Agent (SQL)", layout="wide")
15
  warnings.filterwarnings("ignore")
16
 
17
  # ==========================================
18
- # 2. ROBUST IMPORTS
19
  # ==========================================
 
 
 
 
20
  try:
21
  from langchain_groq import ChatGroq
22
  from langchain_huggingface import HuggingFaceEmbeddings
@@ -37,17 +41,12 @@ except ImportError as e:
37
  # ==========================================
38
  # 3. DATABASE SETUP
39
  # ==========================================
40
- DB_FILE = "bank.db"
41
- INDEX_PATH = "faiss_index"
42
-
43
  def init_db():
44
- """Converts CSV files to SQLite DB. Handles 'replace' errors gracefully."""
45
- # Only run if DB doesn't exist to avoid redundant overwrites
46
  if os.path.exists(DB_FILE):
47
  return
48
 
49
  conn = sqlite3.connect(DB_FILE)
50
-
51
  csv_files = {
52
  "credit_score": "credit_score.csv",
53
  "account_status": "account_status.csv",
@@ -58,27 +57,23 @@ def init_db():
58
  for table, file in csv_files.items():
59
  if os.path.exists(file):
60
  df = pd.read_csv(file)
61
- df.columns = [c.strip() for c in df.columns] # Clean headers
62
  if 'ID' in df.columns:
63
  df['ID'] = df['ID'].astype(str)
64
 
65
- # Robust SQL Write
66
  try:
67
  df.to_sql(table, conn, if_exists='replace', index=False)
68
- except Exception as sql_err:
69
- # Fallback: if 'replace' fails on missing table, try creating it fresh
70
- print(f"⚠️ SQL Warning for {table}: {sql_err}")
71
  pass
72
-
73
  except Exception as e:
74
  st.error(f"DB Init Error: {e}")
75
  finally:
76
  conn.close()
77
 
78
- # Initialize DB
79
  init_db()
80
 
81
- # Helper for tools
82
  def run_query(query, params=()):
83
  try:
84
  with sqlite3.connect(DB_FILE) as conn:
@@ -119,7 +114,6 @@ def check_pr_status(user_id: str) -> str:
119
  clean_id = ''.join(filter(str.isdigit, str(user_id)))
120
  row = run_query("SELECT PR_Status FROM pr_status WHERE ID = ?", (clean_id,))
121
 
122
- # Fallback for column naming differences
123
  if not row or (isinstance(row, str) and "no such column" in row.lower()):
124
  row = run_query("SELECT Is_PR FROM pr_status WHERE ID = ?", (clean_id,))
125
 
@@ -133,6 +127,9 @@ def check_pr_status(user_id: str) -> str:
133
  st.title("🤖 Multi-Policy Loan Assessor (SQL + RAG)")
134
  st.markdown("Agent connects to **SQLite Database** and **Persistent Vector Store**")
135
 
 
 
 
136
  # --- METRICS FUNCTION ---
137
  def update_metrics(placeholder):
138
  manual_time = 15 * 60
@@ -140,26 +137,21 @@ def update_metrics(placeholder):
140
  ai_time = st.session_state.execution_time
141
  time_saved = manual_time - ai_time
142
  saved_pct = (time_saved / manual_time) * 100
143
-
144
  with placeholder.container():
145
  col_kpi1, col_kpi2 = st.columns(2)
146
  col_kpi1.metric("AI Processing", f"{ai_time:.1f}s")
147
- col_kpi2.metric(
148
- "Time Saved",
149
- f"{time_saved/60:.1f} min",
150
- delta=f"{saved_pct:.1f}% faster"
151
- )
152
 
153
  # --- SIDEBAR ---
154
  with st.sidebar:
155
  st.header("🔐 Authentication")
156
 
157
- # 1. Check if Key exists in Secrets (Env Var)
158
  if "GROQ_API_KEY" in st.secrets:
159
  st.session_state['groq_api_key'] = st.secrets["GROQ_API_KEY"]
160
  st.session_state['is_key_valid'] = True
161
 
162
- # 2. Manual Entry Logic
163
  if 'is_key_valid' not in st.session_state:
164
  st.session_state['is_key_valid'] = False
165
 
@@ -206,9 +198,6 @@ with st.sidebar:
206
 
207
  st.divider()
208
 
209
- required_pdfs = ["Bank Loan Overall Risk Policy.pdf", "Bank Loan Interest Rate Policy.pdf"]
210
- pdfs_missing = [f for f in required_pdfs if not os.path.exists(f)]
211
-
212
  if os.path.exists(DB_FILE) and not pdfs_missing:
213
  st.success("✅ System Ready")
214
  else:
@@ -226,19 +215,24 @@ if st.session_state.get('is_key_valid', False):
226
  # --- RAG SETUP ---
227
  @st.cache_resource
228
  def setup_rag():
 
 
 
 
 
229
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
230
  if os.path.exists(INDEX_PATH):
231
  return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True).as_retriever()
232
  else:
233
- if pdfs_missing:
234
- st.error("Missing PDFs.")
235
- st.stop()
236
  documents = []
237
- for pdf_file in required_pdfs:
238
  loader = PyPDFLoader(pdf_file)
239
  documents.extend(loader.load())
 
240
  text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=50)
241
  final_docs = text_splitter.split_documents(documents)
 
242
  vectorstore = FAISS.from_documents(final_docs, embeddings)
243
  vectorstore.save_local(INDEX_PATH)
244
  return vectorstore.as_retriever()
@@ -248,7 +242,6 @@ if st.session_state.get('is_key_valid', False):
248
 
249
  llm = ChatGroq(temperature=0, model_name="llama-3.3-70b-versatile")
250
 
251
- # RAG Chain
252
  rag_prompt = ChatPromptTemplate.from_template("Answer based on context:\n{context}\nQuestion: {question}")
253
  rag_chain = (
254
  {"context": retriever | (lambda d: "\n".join([x.page_content for x in d])), "question": RunnablePassthrough()}
 
15
  warnings.filterwarnings("ignore")
16
 
17
  # ==========================================
18
+ # 2. GLOBAL CONSTANTS & IMPORTS
19
  # ==========================================
20
+ DB_FILE = "bank.db"
21
+ INDEX_PATH = "faiss_index"
22
+ REQUIRED_PDFS = ["Bank Loan Overall Risk Policy.pdf", "Bank Loan Interest Rate Policy.pdf"]
23
+
24
  try:
25
  from langchain_groq import ChatGroq
26
  from langchain_huggingface import HuggingFaceEmbeddings
 
41
  # ==========================================
42
  # 3. DATABASE SETUP
43
  # ==========================================
 
 
 
44
  def init_db():
45
+ """Converts CSV files to SQLite DB. Handles errors gracefully."""
 
46
  if os.path.exists(DB_FILE):
47
  return
48
 
49
  conn = sqlite3.connect(DB_FILE)
 
50
  csv_files = {
51
  "credit_score": "credit_score.csv",
52
  "account_status": "account_status.csv",
 
57
  for table, file in csv_files.items():
58
  if os.path.exists(file):
59
  df = pd.read_csv(file)
60
+ df.columns = [c.strip() for c in df.columns]
61
  if 'ID' in df.columns:
62
  df['ID'] = df['ID'].astype(str)
63
 
 
64
  try:
65
  df.to_sql(table, conn, if_exists='replace', index=False)
66
+ except Exception:
 
 
67
  pass
 
68
  except Exception as e:
69
  st.error(f"DB Init Error: {e}")
70
  finally:
71
  conn.close()
72
 
73
+ # Initialize DB on startup
74
  init_db()
75
 
76
+ # Helper for SQL tools
77
  def run_query(query, params=()):
78
  try:
79
  with sqlite3.connect(DB_FILE) as conn:
 
114
  clean_id = ''.join(filter(str.isdigit, str(user_id)))
115
  row = run_query("SELECT PR_Status FROM pr_status WHERE ID = ?", (clean_id,))
116
 
 
117
  if not row or (isinstance(row, str) and "no such column" in row.lower()):
118
  row = run_query("SELECT Is_PR FROM pr_status WHERE ID = ?", (clean_id,))
119
 
 
127
  st.title("🤖 Multi-Policy Loan Assessor (SQL + RAG)")
128
  st.markdown("Agent connects to **SQLite Database** and **Persistent Vector Store**")
129
 
130
+ # Calculate missing PDFs globally so everyone can see it
131
+ pdfs_missing = [f for f in REQUIRED_PDFS if not os.path.exists(f)]
132
+
133
  # --- METRICS FUNCTION ---
134
  def update_metrics(placeholder):
135
  manual_time = 15 * 60
 
137
  ai_time = st.session_state.execution_time
138
  time_saved = manual_time - ai_time
139
  saved_pct = (time_saved / manual_time) * 100
 
140
  with placeholder.container():
141
  col_kpi1, col_kpi2 = st.columns(2)
142
  col_kpi1.metric("AI Processing", f"{ai_time:.1f}s")
143
+ col_kpi2.metric("Time Saved", f"{time_saved/60:.1f} min", delta=f"{saved_pct:.1f}% faster")
 
 
 
 
144
 
145
  # --- SIDEBAR ---
146
  with st.sidebar:
147
  st.header("🔐 Authentication")
148
 
149
+ # Check Secrets
150
  if "GROQ_API_KEY" in st.secrets:
151
  st.session_state['groq_api_key'] = st.secrets["GROQ_API_KEY"]
152
  st.session_state['is_key_valid'] = True
153
 
154
+ # Manual Entry
155
  if 'is_key_valid' not in st.session_state:
156
  st.session_state['is_key_valid'] = False
157
 
 
198
 
199
  st.divider()
200
 
 
 
 
201
  if os.path.exists(DB_FILE) and not pdfs_missing:
202
  st.success("✅ System Ready")
203
  else:
 
215
  # --- RAG SETUP ---
216
  @st.cache_resource
217
  def setup_rag():
218
+ # Check global variable here
219
+ if pdfs_missing:
220
+ st.error(f"Missing PDFs: {pdfs_missing}")
221
+ st.stop()
222
+
223
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
224
+
225
  if os.path.exists(INDEX_PATH):
226
  return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True).as_retriever()
227
  else:
 
 
 
228
  documents = []
229
+ for pdf_file in REQUIRED_PDFS:
230
  loader = PyPDFLoader(pdf_file)
231
  documents.extend(loader.load())
232
+
233
  text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=50)
234
  final_docs = text_splitter.split_documents(documents)
235
+
236
  vectorstore = FAISS.from_documents(final_docs, embeddings)
237
  vectorstore.save_local(INDEX_PATH)
238
  return vectorstore.as_retriever()
 
242
 
243
  llm = ChatGroq(temperature=0, model_name="llama-3.3-70b-versatile")
244
 
 
245
  rag_prompt = ChatPromptTemplate.from_template("Answer based on context:\n{context}\nQuestion: {question}")
246
  rag_chain = (
247
  {"context": retriever | (lambda d: "\n".join([x.page_content for x in d])), "question": RunnablePassthrough()}