Spaces:

MLbySush
/

banking-intelligence-assistant

Sleeping

App Files Files Community

Sush commited on Mar 14

Commit

c62ce64

0 Parent(s):

Initial commit: Banking Intelligence Assistant

Browse files

Files changed (14) hide show

.env.example +2 -0
.github/workflows/ci-cd.yml +0 -0
.gitignore +18 -0
Dockerfile +0 -0
README.md +0 -0
agents/__init__.py +0 -0
agents/orchestrator.py +61 -0
agents/rag_agent.py +70 -0
agents/sql_agent.py +86 -0
app.py +90 -0
data/database/banking.db +0 -0
ingest.py +71 -0
notebooks/prototype.ipynb +0 -0
requirements.txt +11 -0

.env.example ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ GROQ_API_KEY=your_groq_api_key_here
2	+ HUGGINGFACE_TOKEN=your_huggingface_token_here

.github/workflows/ci-cd.yml ADDED Viewed

File without changes

.gitignore ADDED Viewed

	@@ -0,0 +1,18 @@

+# Environment
+.env
+__pycache__/
+*.pyc
+*.pyo
+# Vectorstore (too large for git)
+vectorstore/
+# Jupyter checkpoints
+.ipynb_checkpoints/
+# Mac system files
+.DS_Store
+# Virtual environment
+venv/
+.venv/

Dockerfile ADDED Viewed

File without changes

README.md ADDED Viewed

File without changes

agents/__init__.py ADDED Viewed

File without changes

agents/orchestrator.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from langgraph.graph import StateGraph, END
+from typing import TypedDict
+class AgentState(TypedDict):
+    query: str
+    agent_used: str
+    response: str
+def build_orchestrator(rag_chain, sql_agent):
+    """Build the LangGraph orchestrator that routes between RAG and SQL agents."""
+    # Router node
+    def router(state: AgentState) -> AgentState:
+        query = state["query"].lower()
+        sql_keywords = [
+            "transaction", "balance", "how many", "outstanding",
+            "credit card", "merchant", "customer", "branch",
+            "average", "total", "count", "highest", "lowest",
+            "failed", "blocked", "overdue", "statement"
+        ]
+        rag_keywords = [
+            "policy", "rule", "guideline", "what is", "how does",
+            "eligibility", "penalty", "interest rate", "fee",
+            "grievance", "kyc", "document", "complaint", "process",
+            "minimum balance", "loan", "terms", "conditions"
+        ]
+        sql_score = sum(1 for kw in sql_keywords if kw in query)
+        rag_score = sum(1 for kw in rag_keywords if kw in query)
+        state["agent_used"] = "sql" if sql_score > rag_score else "rag"
+        return state
+    # RAG node
+    def run_rag_agent(state: AgentState) -> AgentState:
+        state["response"] = rag_chain.invoke(state["query"])
+        return state
+    # SQL node
+    def run_sql_agent(state: AgentState) -> AgentState:
+        result = sql_agent.invoke({"input": state["query"]})
+        state["response"] = result["output"]
+        return state
+    # Routing function
+    def route_to_agent(state: AgentState) -> str:
+        return state["agent_used"]
+    # Build graph
+    workflow = StateGraph(AgentState)
+    workflow.add_node("router", router)
+    workflow.add_node("rag", run_rag_agent)
+    workflow.add_node("sql", run_sql_agent)
+    workflow.set_entry_point("router")
+    workflow.add_conditional_edges(
+        "router", route_to_agent, {"rag": "rag", "sql": "sql"}
+    )
+    workflow.add_edge("rag", END)
+    workflow.add_edge("sql", END)
+    return workflow.compile()

agents/rag_agent.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+from langchain_groq import ChatGroq
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from dotenv import load_dotenv
+load_dotenv()
+def load_rag_agent(vectorstore_path: str = "vectorstore/"):
+    """Load the RAG agent from saved FAISS vectorstore."""
+    # Load embeddings
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    # Load FAISS index
+    vectorstore = FAISS.load_local(
+        vectorstore_path,
+        embeddings,
+        allow_dangerous_deserialization=True
+    )
+    # MMR retriever
+    retriever = vectorstore.as_retriever(
+        search_type="mmr",
+        search_kwargs={"k": 4, "fetch_k": 20, "lambda_mult": 0.7}
+    )
+    # LLM
+    llm = ChatGroq(
+        api_key=os.getenv("GROQ_API_KEY"),
+        model_name="llama-3.1-8b-instant",
+        temperature=0
+    )
+    # Grounded prompt
+    prompt_template = """You are a helpful HDFC Bank policy assistant.
+Use ONLY the context below to answer the customer's question.
+If the answer is not in the context, say "I don't have enough information
+in the policy documents to answer this. Please contact HDFC Bank directly."
+Context:
+{context}
+Customer Question: {question}
+Answer:"""
+    prompt = PromptTemplate(
+        template=prompt_template,
+        input_variables=["context", "question"]
+    )
+    def format_docs(docs):
+        return "\n\n".join(doc.page_content for doc in docs)
+    # LCEL chain
+    rag_chain = (
+        {"context": retriever | format_docs, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    return rag_chain

agents/sql_agent.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+from langchain_groq import ChatGroq
+from langchain_community.utilities import SQLDatabase
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnableLambda
+from dotenv import load_dotenv
+load_dotenv()
+def load_sql_agent(db_path: str = "data/database/banking.db"):
+    """Custom SQL chain using LCEL — more reliable than agent for Llama models."""
+    db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
+    llm = ChatGroq(
+        api_key=os.getenv("GROQ_API_KEY"),
+        model_name="llama-3.3-70b-versatile",
+        temperature=0
+    )
+    # Step 1 — Generate SQL from question
+    sql_generation_prompt = PromptTemplate(
+        template="""You are a SQL expert. Given the database schema and question, write a SQLite SQL query.
+Return ONLY the SQL query, nothing else. No explanation, no markdown, no backticks.
+Database Schema:
+{schema}
+Question: {question}
+SQL Query:""",
+        input_variables=["schema", "question"]
+    )
+    # Step 2 — Generate final answer from SQL result
+    answer_prompt = PromptTemplate(
+        template="""Given the question, SQL query, and result, write a clear answer.
+Question: {question}
+SQL Query: {query}
+SQL Result: {result}
+Answer:""",
+        input_variables=["question", "query", "result"]
+    )
+    def run_sql_chain(question: str) -> str:
+        try:
+            # Get schema
+            schema = db.get_table_info()
+            # Generate SQL
+            sql_chain = sql_generation_prompt | llm | StrOutputParser()
+            sql_query = sql_chain.invoke({
+                "schema": schema,
+                "question": question
+            }).strip()
+            # Clean up query if needed
+            sql_query = sql_query.replace("```sql", "").replace("```", "").strip()
+            # Execute SQL
+            result = db.run(sql_query)
+            # Generate answer
+            answer_chain = answer_prompt | llm | StrOutputParser()
+            answer = answer_chain.invoke({
+                "question": question,
+                "query": sql_query,
+                "result": result
+            })
+            return answer
+        except Exception as e:
+            return f"I encountered an error processing your query: {str(e)}"
+    # Wrap as a dict-compatible interface to match orchestrator
+    class SQLChainWrapper:
+        def invoke(self, input_dict):
+            question = input_dict.get("input", "")
+            output = run_sql_chain(question)
+            return {"output": output}
+    return SQLChainWrapper()

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+import streamlit as st
+from agents.rag_agent import load_rag_agent
+from agents.sql_agent import load_sql_agent
+from agents.orchestrator import build_orchestrator
+# ── PAGE CONFIG ──
+st.set_page_config(
+    page_title="HDFC Banking Intelligence Assistant",
+    page_icon="🏦",
+    layout="centered"
+)
+# ── HEADER ──
+st.title(" HDFC Banking Intelligence Assistant")
+st.markdown("""
+Ask me anything about **HDFC Bank policies** or your **account & transaction data**.
+I'll automatically route your question to the right agent.
+""")
+st.divider()
+# ── LOAD AGENTS (cached so they load only once) ──
+@st.cache_resource
+def load_agents():
+    with st.spinner("Loading agents... please wait "):
+        rag_chain = load_rag_agent()
+        sql_agent = load_sql_agent()
+        orchestrator = build_orchestrator(rag_chain, sql_agent)
+    return orchestrator
+orchestrator = load_agents()
+# ── CHAT HISTORY ──
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat history
+for msg in st.session_state.messages:
+    with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+# ── SAMPLE QUESTIONS ──
+if len(st.session_state.messages) == 0:
+    st.markdown("#### Try asking:")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.info(" What is the minimum balance for a savings account?")
+        st.info(" How can I raise a grievance against HDFC Bank?")
+        st.info(" What are the KYC documents required?")
+    with col2:
+        st.info(" Which customers have overdue credit cards?")
+        st.info(" Which merchant has the highest transactions?")
+        st.info(" What is the average balance by account type?")
+# ── CHAT INPUT ──
+if query := st.chat_input("Ask your banking question here..."):
+    # Add user message
+    st.session_state.messages.append({"role": "user", "content": query})
+    with st.chat_message("user"):
+        st.markdown(query)
+    # Get response
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            result = orchestrator.invoke({
+                "query": query,
+                "agent_used": "",
+                "response": ""
+            })
+        response = result["response"]
+        agent_used = result["agent_used"].upper()
+        # Show which agent handled it
+        if agent_used == "RAG":
+            st.caption(" Answered by: Policy Agent (RAG)")
+        else:
+            st.caption(" Answered by: Data Agent (SQL)")
+        st.markdown(response)
+    # Save assistant message
+    st.session_state.messages.append({
+        "role": "assistant",
+        "content": f"*[{agent_used} Agent]*\n\n{response}"
+    })

data/database/banking.db ADDED Viewed

Binary file (86 kB). View file

ingest.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import re
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+# ── CONFIGURATION ──
+DOCS_PATH = "data/documents/"
+VECTORSTORE_PATH = "vectorstore/"
+PDF_FILES = [
+    "hdfc_credit_card_policy.pdf",
+    "hdfc_customer_compensation_policy.pdf",
+    "hdfc_grievance_policy.pdf",
+    "hdfc_personal_loan_agreement.pdf",
+    "hdfc_savings_account_charges.pdf",
+    "hdfc_general_terms_conditions.pdf"
+]
+def clean_text(text: str) -> str:
+    text = re.sub(r'Classification\s*[-–]\s*Internal', '', text)
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    text = re.sub(r'\s{3,}', ' ', text)
+    text = re.sub(r'as on \d{2}\.\d{2}\.\d{4}', '', text)
+    return text.strip()
+def ingest():
+    print(" Loading PDFs...")
+    all_documents = []
+    for pdf in PDF_FILES:
+        path = os.path.join(DOCS_PATH, pdf)
+        if not os.path.exists(path):
+            print(f"  Skipping missing file: {pdf}")
+            continue
+        loader = PyPDFLoader(path)
+        docs = loader.load()
+        all_documents.extend(docs)
+        print(f"   {pdf} — {len(docs)} pages")
+    print(f"\n Total pages: {len(all_documents)}")
+    # Split
+    print("\n  Splitting into chunks...")
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        separators=["\n\n", "\n", ".", " "]
+    )
+    chunks = splitter.split_documents(all_documents)
+    # Clean
+    for chunk in chunks:
+        chunk.page_content = clean_text(chunk.page_content)
+    chunks = [c for c in chunks if len(c.page_content) > 50]
+    print(f" Chunks after cleaning: {len(chunks)}")
+    # Embed + Save FAISS
+    print("\n Building FAISS index...")
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    vectorstore = FAISS.from_documents(chunks, embeddings)
+    os.makedirs(VECTORSTORE_PATH, exist_ok=True)
+    vectorstore.save_local(VECTORSTORE_PATH)
+    print(f" FAISS index saved to '{VECTORSTORE_PATH}'")
+    print(f" Total vectors: {vectorstore.index.ntotal}")
+if __name__ == "__main__":
+    ingest()

notebooks/prototype.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+langchain
+langchain-community
+langchain-groq
+langgraph
+faiss-cpu
+sentence-transformers
+streamlit
+python-dotenv
+sqlalchemy
+pandas
+groq