Spaces:
Sleeping
Sleeping
Update app.py
Browse filesfix the pdf missing error
app.py
CHANGED
|
@@ -15,8 +15,12 @@ st.set_page_config(page_title="Bank Loan Agent (SQL)", layout="wide")
|
|
| 15 |
warnings.filterwarnings("ignore")
|
| 16 |
|
| 17 |
# ==========================================
|
| 18 |
-
# 2.
|
| 19 |
# ==========================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
try:
|
| 21 |
from langchain_groq import ChatGroq
|
| 22 |
from langchain_huggingface import HuggingFaceEmbeddings
|
|
@@ -37,17 +41,12 @@ except ImportError as e:
|
|
| 37 |
# ==========================================
|
| 38 |
# 3. DATABASE SETUP
|
| 39 |
# ==========================================
|
| 40 |
-
DB_FILE = "bank.db"
|
| 41 |
-
INDEX_PATH = "faiss_index"
|
| 42 |
-
|
| 43 |
def init_db():
|
| 44 |
-
"""Converts CSV files to SQLite DB. Handles
|
| 45 |
-
# Only run if DB doesn't exist to avoid redundant overwrites
|
| 46 |
if os.path.exists(DB_FILE):
|
| 47 |
return
|
| 48 |
|
| 49 |
conn = sqlite3.connect(DB_FILE)
|
| 50 |
-
|
| 51 |
csv_files = {
|
| 52 |
"credit_score": "credit_score.csv",
|
| 53 |
"account_status": "account_status.csv",
|
|
@@ -58,27 +57,23 @@ def init_db():
|
|
| 58 |
for table, file in csv_files.items():
|
| 59 |
if os.path.exists(file):
|
| 60 |
df = pd.read_csv(file)
|
| 61 |
-
df.columns = [c.strip() for c in df.columns]
|
| 62 |
if 'ID' in df.columns:
|
| 63 |
df['ID'] = df['ID'].astype(str)
|
| 64 |
|
| 65 |
-
# Robust SQL Write
|
| 66 |
try:
|
| 67 |
df.to_sql(table, conn, if_exists='replace', index=False)
|
| 68 |
-
except Exception
|
| 69 |
-
# Fallback: if 'replace' fails on missing table, try creating it fresh
|
| 70 |
-
print(f"⚠️ SQL Warning for {table}: {sql_err}")
|
| 71 |
pass
|
| 72 |
-
|
| 73 |
except Exception as e:
|
| 74 |
st.error(f"DB Init Error: {e}")
|
| 75 |
finally:
|
| 76 |
conn.close()
|
| 77 |
|
| 78 |
-
# Initialize DB
|
| 79 |
init_db()
|
| 80 |
|
| 81 |
-
# Helper for tools
|
| 82 |
def run_query(query, params=()):
|
| 83 |
try:
|
| 84 |
with sqlite3.connect(DB_FILE) as conn:
|
|
@@ -119,7 +114,6 @@ def check_pr_status(user_id: str) -> str:
|
|
| 119 |
clean_id = ''.join(filter(str.isdigit, str(user_id)))
|
| 120 |
row = run_query("SELECT PR_Status FROM pr_status WHERE ID = ?", (clean_id,))
|
| 121 |
|
| 122 |
-
# Fallback for column naming differences
|
| 123 |
if not row or (isinstance(row, str) and "no such column" in row.lower()):
|
| 124 |
row = run_query("SELECT Is_PR FROM pr_status WHERE ID = ?", (clean_id,))
|
| 125 |
|
|
@@ -133,6 +127,9 @@ def check_pr_status(user_id: str) -> str:
|
|
| 133 |
st.title("🤖 Multi-Policy Loan Assessor (SQL + RAG)")
|
| 134 |
st.markdown("Agent connects to **SQLite Database** and **Persistent Vector Store**")
|
| 135 |
|
|
|
|
|
|
|
|
|
|
| 136 |
# --- METRICS FUNCTION ---
|
| 137 |
def update_metrics(placeholder):
|
| 138 |
manual_time = 15 * 60
|
|
@@ -140,26 +137,21 @@ def update_metrics(placeholder):
|
|
| 140 |
ai_time = st.session_state.execution_time
|
| 141 |
time_saved = manual_time - ai_time
|
| 142 |
saved_pct = (time_saved / manual_time) * 100
|
| 143 |
-
|
| 144 |
with placeholder.container():
|
| 145 |
col_kpi1, col_kpi2 = st.columns(2)
|
| 146 |
col_kpi1.metric("AI Processing", f"{ai_time:.1f}s")
|
| 147 |
-
col_kpi2.metric(
|
| 148 |
-
"Time Saved",
|
| 149 |
-
f"{time_saved/60:.1f} min",
|
| 150 |
-
delta=f"{saved_pct:.1f}% faster"
|
| 151 |
-
)
|
| 152 |
|
| 153 |
# --- SIDEBAR ---
|
| 154 |
with st.sidebar:
|
| 155 |
st.header("🔐 Authentication")
|
| 156 |
|
| 157 |
-
#
|
| 158 |
if "GROQ_API_KEY" in st.secrets:
|
| 159 |
st.session_state['groq_api_key'] = st.secrets["GROQ_API_KEY"]
|
| 160 |
st.session_state['is_key_valid'] = True
|
| 161 |
|
| 162 |
-
#
|
| 163 |
if 'is_key_valid' not in st.session_state:
|
| 164 |
st.session_state['is_key_valid'] = False
|
| 165 |
|
|
@@ -206,9 +198,6 @@ with st.sidebar:
|
|
| 206 |
|
| 207 |
st.divider()
|
| 208 |
|
| 209 |
-
required_pdfs = ["Bank Loan Overall Risk Policy.pdf", "Bank Loan Interest Rate Policy.pdf"]
|
| 210 |
-
pdfs_missing = [f for f in required_pdfs if not os.path.exists(f)]
|
| 211 |
-
|
| 212 |
if os.path.exists(DB_FILE) and not pdfs_missing:
|
| 213 |
st.success("✅ System Ready")
|
| 214 |
else:
|
|
@@ -226,19 +215,24 @@ if st.session_state.get('is_key_valid', False):
|
|
| 226 |
# --- RAG SETUP ---
|
| 227 |
@st.cache_resource
|
| 228 |
def setup_rag():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
|
|
|
| 230 |
if os.path.exists(INDEX_PATH):
|
| 231 |
return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True).as_retriever()
|
| 232 |
else:
|
| 233 |
-
if pdfs_missing:
|
| 234 |
-
st.error("Missing PDFs.")
|
| 235 |
-
st.stop()
|
| 236 |
documents = []
|
| 237 |
-
for pdf_file in
|
| 238 |
loader = PyPDFLoader(pdf_file)
|
| 239 |
documents.extend(loader.load())
|
|
|
|
| 240 |
text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 241 |
final_docs = text_splitter.split_documents(documents)
|
|
|
|
| 242 |
vectorstore = FAISS.from_documents(final_docs, embeddings)
|
| 243 |
vectorstore.save_local(INDEX_PATH)
|
| 244 |
return vectorstore.as_retriever()
|
|
@@ -248,7 +242,6 @@ if st.session_state.get('is_key_valid', False):
|
|
| 248 |
|
| 249 |
llm = ChatGroq(temperature=0, model_name="llama-3.3-70b-versatile")
|
| 250 |
|
| 251 |
-
# RAG Chain
|
| 252 |
rag_prompt = ChatPromptTemplate.from_template("Answer based on context:\n{context}\nQuestion: {question}")
|
| 253 |
rag_chain = (
|
| 254 |
{"context": retriever | (lambda d: "\n".join([x.page_content for x in d])), "question": RunnablePassthrough()}
|
|
|
|
| 15 |
warnings.filterwarnings("ignore")
|
| 16 |
|
| 17 |
# ==========================================
|
| 18 |
+
# 2. GLOBAL CONSTANTS & IMPORTS
|
| 19 |
# ==========================================
|
| 20 |
+
DB_FILE = "bank.db"
|
| 21 |
+
INDEX_PATH = "faiss_index"
|
| 22 |
+
REQUIRED_PDFS = ["Bank Loan Overall Risk Policy.pdf", "Bank Loan Interest Rate Policy.pdf"]
|
| 23 |
+
|
| 24 |
try:
|
| 25 |
from langchain_groq import ChatGroq
|
| 26 |
from langchain_huggingface import HuggingFaceEmbeddings
|
|
|
|
| 41 |
# ==========================================
|
| 42 |
# 3. DATABASE SETUP
|
| 43 |
# ==========================================
|
|
|
|
|
|
|
|
|
|
| 44 |
def init_db():
|
| 45 |
+
"""Converts CSV files to SQLite DB. Handles errors gracefully."""
|
|
|
|
| 46 |
if os.path.exists(DB_FILE):
|
| 47 |
return
|
| 48 |
|
| 49 |
conn = sqlite3.connect(DB_FILE)
|
|
|
|
| 50 |
csv_files = {
|
| 51 |
"credit_score": "credit_score.csv",
|
| 52 |
"account_status": "account_status.csv",
|
|
|
|
| 57 |
for table, file in csv_files.items():
|
| 58 |
if os.path.exists(file):
|
| 59 |
df = pd.read_csv(file)
|
| 60 |
+
df.columns = [c.strip() for c in df.columns]
|
| 61 |
if 'ID' in df.columns:
|
| 62 |
df['ID'] = df['ID'].astype(str)
|
| 63 |
|
|
|
|
| 64 |
try:
|
| 65 |
df.to_sql(table, conn, if_exists='replace', index=False)
|
| 66 |
+
except Exception:
|
|
|
|
|
|
|
| 67 |
pass
|
|
|
|
| 68 |
except Exception as e:
|
| 69 |
st.error(f"DB Init Error: {e}")
|
| 70 |
finally:
|
| 71 |
conn.close()
|
| 72 |
|
| 73 |
+
# Initialize DB on startup
|
| 74 |
init_db()
|
| 75 |
|
| 76 |
+
# Helper for SQL tools
|
| 77 |
def run_query(query, params=()):
|
| 78 |
try:
|
| 79 |
with sqlite3.connect(DB_FILE) as conn:
|
|
|
|
| 114 |
clean_id = ''.join(filter(str.isdigit, str(user_id)))
|
| 115 |
row = run_query("SELECT PR_Status FROM pr_status WHERE ID = ?", (clean_id,))
|
| 116 |
|
|
|
|
| 117 |
if not row or (isinstance(row, str) and "no such column" in row.lower()):
|
| 118 |
row = run_query("SELECT Is_PR FROM pr_status WHERE ID = ?", (clean_id,))
|
| 119 |
|
|
|
|
| 127 |
st.title("🤖 Multi-Policy Loan Assessor (SQL + RAG)")
|
| 128 |
st.markdown("Agent connects to **SQLite Database** and **Persistent Vector Store**")
|
| 129 |
|
| 130 |
+
# Calculate missing PDFs globally so everyone can see it
|
| 131 |
+
pdfs_missing = [f for f in REQUIRED_PDFS if not os.path.exists(f)]
|
| 132 |
+
|
| 133 |
# --- METRICS FUNCTION ---
|
| 134 |
def update_metrics(placeholder):
|
| 135 |
manual_time = 15 * 60
|
|
|
|
| 137 |
ai_time = st.session_state.execution_time
|
| 138 |
time_saved = manual_time - ai_time
|
| 139 |
saved_pct = (time_saved / manual_time) * 100
|
|
|
|
| 140 |
with placeholder.container():
|
| 141 |
col_kpi1, col_kpi2 = st.columns(2)
|
| 142 |
col_kpi1.metric("AI Processing", f"{ai_time:.1f}s")
|
| 143 |
+
col_kpi2.metric("Time Saved", f"{time_saved/60:.1f} min", delta=f"{saved_pct:.1f}% faster")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
# --- SIDEBAR ---
|
| 146 |
with st.sidebar:
|
| 147 |
st.header("🔐 Authentication")
|
| 148 |
|
| 149 |
+
# Check Secrets
|
| 150 |
if "GROQ_API_KEY" in st.secrets:
|
| 151 |
st.session_state['groq_api_key'] = st.secrets["GROQ_API_KEY"]
|
| 152 |
st.session_state['is_key_valid'] = True
|
| 153 |
|
| 154 |
+
# Manual Entry
|
| 155 |
if 'is_key_valid' not in st.session_state:
|
| 156 |
st.session_state['is_key_valid'] = False
|
| 157 |
|
|
|
|
| 198 |
|
| 199 |
st.divider()
|
| 200 |
|
|
|
|
|
|
|
|
|
|
| 201 |
if os.path.exists(DB_FILE) and not pdfs_missing:
|
| 202 |
st.success("✅ System Ready")
|
| 203 |
else:
|
|
|
|
| 215 |
# --- RAG SETUP ---
|
| 216 |
@st.cache_resource
|
| 217 |
def setup_rag():
|
| 218 |
+
# Check global variable here
|
| 219 |
+
if pdfs_missing:
|
| 220 |
+
st.error(f"Missing PDFs: {pdfs_missing}")
|
| 221 |
+
st.stop()
|
| 222 |
+
|
| 223 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 224 |
+
|
| 225 |
if os.path.exists(INDEX_PATH):
|
| 226 |
return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True).as_retriever()
|
| 227 |
else:
|
|
|
|
|
|
|
|
|
|
| 228 |
documents = []
|
| 229 |
+
for pdf_file in REQUIRED_PDFS:
|
| 230 |
loader = PyPDFLoader(pdf_file)
|
| 231 |
documents.extend(loader.load())
|
| 232 |
+
|
| 233 |
text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 234 |
final_docs = text_splitter.split_documents(documents)
|
| 235 |
+
|
| 236 |
vectorstore = FAISS.from_documents(final_docs, embeddings)
|
| 237 |
vectorstore.save_local(INDEX_PATH)
|
| 238 |
return vectorstore.as_retriever()
|
|
|
|
| 242 |
|
| 243 |
llm = ChatGroq(temperature=0, model_name="llama-3.3-70b-versatile")
|
| 244 |
|
|
|
|
| 245 |
rag_prompt = ChatPromptTemplate.from_template("Answer based on context:\n{context}\nQuestion: {question}")
|
| 246 |
rag_chain = (
|
| 247 |
{"context": retriever | (lambda d: "\n".join([x.page_content for x in d])), "question": RunnablePassthrough()}
|